Skip to main content

cartog_db/
lib.rs

1//! SQLite persistence layer for the cartog code graph.
2//!
3//! Stores symbols, edges, and file metadata in a single SQLite database.
4//! Provides graph traversal queries (callees, refs, impact, hierarchy),
5//! full-text search via FTS5, vector KNN search via sqlite-vec, and a
6//! 6-tier heuristic edge resolution algorithm.
7#![doc = ""]
8#![doc = include_str!("../README.md")]
9
10use anyhow::{Context, Result};
11use rusqlite::ffi::sqlite3_auto_extension;
12use rusqlite::{params, Connection, OptionalExtension};
13use serde::Serialize;
14use sqlite_vec::sqlite3_vec_init;
15use tracing::{info, warn};
16
17use cartog_core::{Edge, EdgeKind, EdgeProvenance, FileInfo, Symbol, SymbolKind, Visibility};
18
19/// Typed errors for the database-open and schema-migration paths.
20///
21/// The rest of the query API still returns `anyhow::Result` for now;
22/// this enum exists so callers (the binary, MCP server, plugin authors)
23/// can pattern-match on the actionable failure modes around opening a
24/// database — especially distinguishing a corrupt file from a missing
25/// one from a schema incompatibility. A `From<DbError>` impl on
26/// `anyhow::Error` is provided automatically by the trait blanket, so
27/// existing `?`-based call sites keep working unchanged.
28#[derive(Debug, thiserror::Error)]
29pub enum DbError {
30    /// Failure opening or creating the SQLite file itself (permission
31    /// denied, path missing, disk full, etc.).
32    #[error("failed to open database at {path}: {source}")]
33    Open {
34        path: std::path::PathBuf,
35        #[source]
36        source: rusqlite::Error,
37    },
38
39    /// Failure preparing the on-disk layout (e.g. could not create the
40    /// `.cartog/` parent directory).
41    #[error("failed to prepare database directory {path}: {source}")]
42    PrepareDir {
43        path: std::path::PathBuf,
44        #[source]
45        source: std::io::Error,
46    },
47
48    /// Could not apply one of the startup PRAGMAs (journal_mode, WAL, …).
49    #[error("failed to set startup pragmas: {0}")]
50    Pragma(#[source] rusqlite::Error),
51
52    /// Could not apply the `CREATE TABLE IF NOT EXISTS` schema bootstrap.
53    #[error("failed to create schema: {0}")]
54    Schema(#[source] rusqlite::Error),
55
56    /// Could not create or migrate the RAG (FTS + vector) tables.
57    #[error("failed to create RAG schema: {0}")]
58    RagSchema(#[source] rusqlite::Error),
59
60    /// Pre-migration backup via `VACUUM INTO` failed.
61    #[error("failed to back up database before destructive migration to {path}: {source}")]
62    BackupFailed {
63        path: std::path::PathBuf,
64        #[source]
65        source: rusqlite::Error,
66    },
67
68    /// Embedding-dimension reconciliation failed (the stored `symbol_vec`
69    /// shape didn't match the requested one and we couldn't rebuild it).
70    #[error("embedding dimension migration failed: {0}")]
71    EmbeddingDimension(#[source] rusqlite::Error),
72
73    /// Read-only attach found a `schema_version` on disk that this binary
74    /// doesn't know how to query. The primary writer was upgraded to a
75    /// newer cartog; the read-only client should exit cleanly and let the
76    /// user restart against the new version.
77    #[error(
78        "schema_version mismatch: this binary expects {expected}, DB has {stored} \
79         (a different cartog process upgraded the schema; restart this session)"
80    )]
81    SchemaDrift { expected: u32, stored: u32 },
82
83    /// A catch-all for other rusqlite-level failures inside `open` —
84    /// use more specific variants whenever they fit.
85    #[error(transparent)]
86    Sqlite(#[from] rusqlite::Error),
87}
88
89/// Result alias for the typed-error helpers below.
90pub type DbResult<T> = std::result::Result<T, DbError>;
91
92const SQL_INSERT_SYMBOL: &str = "INSERT OR REPLACE INTO symbols
93     (id, name, kind, file_path, start_line, end_line, start_byte, end_byte,
94      parent_id, signature, visibility, is_async, docstring, content_hash, subtree_hash)
95     VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15)";
96
97const SQL_INSERT_EDGE: &str = "INSERT INTO edges
98     (source_id, target_name, target_id, kind, file_path, line, resolution_state, resolution_source)
99     VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)";
100
101const SCHEMA: &str = r#"
102CREATE TABLE IF NOT EXISTS symbols (
103    id TEXT PRIMARY KEY,
104    name TEXT NOT NULL,
105    kind TEXT NOT NULL,
106    file_path TEXT NOT NULL,
107    start_line INTEGER,
108    end_line INTEGER,
109    start_byte INTEGER,
110    end_byte INTEGER,
111    parent_id TEXT,
112    signature TEXT,
113    visibility TEXT,
114    is_async BOOLEAN DEFAULT FALSE,
115    docstring TEXT,
116    in_degree INTEGER DEFAULT 0,
117    content_hash TEXT,
118    subtree_hash TEXT
119);
120
121CREATE TABLE IF NOT EXISTS edges (
122    id INTEGER PRIMARY KEY AUTOINCREMENT,
123    source_id TEXT NOT NULL,
124    target_name TEXT NOT NULL,
125    target_id TEXT,
126    kind TEXT NOT NULL,
127    file_path TEXT NOT NULL,
128    line INTEGER,
129    -- 0 = unresolved (heuristic + LSP not yet definitive), 1 = resolved,
130    -- 2 = unresolvable (LSP definitively returned no definition: typo, dyn dispatch, macro),
131    -- 3 = external (LSP located the target outside the indexed root: stdlib, deps, node_modules).
132    resolution_state INTEGER NOT NULL DEFAULT 0,
133    -- Which tier/source resolved target_id (EdgeProvenance::as_str), or NULL for
134    -- unresolved edges and rows resolved before provenance tracking existed.
135    resolution_source TEXT,
136    FOREIGN KEY (source_id) REFERENCES symbols(id)
137);
138
139CREATE TABLE IF NOT EXISTS files (
140    path TEXT PRIMARY KEY,
141    last_modified REAL,
142    hash TEXT,
143    language TEXT,
144    num_symbols INTEGER DEFAULT 0
145);
146
147CREATE TABLE IF NOT EXISTS metadata (
148    key TEXT PRIMARY KEY,
149    value TEXT
150);
151
152-- query_log feeds `cartog stats --savings` / `cartog savings`. One row per
153-- successful read tool call (CLI or MCP). No query payload is stored — just
154-- which tool, when, and the call surface — to keep the local-first promise.
155CREATE TABLE IF NOT EXISTS query_log (
156    id INTEGER PRIMARY KEY AUTOINCREMENT,
157    tool TEXT NOT NULL,
158    source TEXT NOT NULL,
159    ts INTEGER NOT NULL
160);
161
162CREATE INDEX IF NOT EXISTS idx_query_log_tool ON query_log(tool);
163CREATE INDEX IF NOT EXISTS idx_query_log_ts ON query_log(ts);
164
165CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name);
166CREATE INDEX IF NOT EXISTS idx_symbols_kind ON symbols(kind);
167CREATE INDEX IF NOT EXISTS idx_symbols_file ON symbols(file_path);
168CREATE INDEX IF NOT EXISTS idx_symbols_parent ON symbols(parent_id);
169-- Composite: speeds up same-directory edge resolution
170-- (WHERE name = ? AND file_path LIKE ?) in `resolve_edges_pass`.
171CREATE INDEX IF NOT EXISTS idx_symbols_name_file ON symbols(name, file_path);
172CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(source_id);
173CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target_name);
174CREATE INDEX IF NOT EXISTS idx_edges_target_id ON edges(target_id);
175CREATE INDEX IF NOT EXISTS idx_edges_kind ON edges(kind);
176-- Per-file edge delete (clear_file_data_in_tx); without it the DELETE full-scans
177-- edges per file, making --force/first-index O(files×edges). idx_edges_unresolved
178-- is partial (state=0) so it can't serve deletes of resolved edges.
179CREATE INDEX IF NOT EXISTS idx_edges_file ON edges(file_path);
180-- Tier-2 import-path lookups; kind-only index scans all imports edges per call (#109).
181CREATE INDEX IF NOT EXISTS idx_edges_kind_target ON edges(kind, target_name);
182-- idx_edges_unresolved (partial index on resolution_state=0) is created
183-- post-migration in Database::open so pre-v4 DBs without the column don't
184-- blow up at SCHEMA-load time.
185"#;
186
187/// Schema for RAG semantic search tables.
188///
189/// - `symbol_content`: stores raw source code for each symbol (extracted via byte offsets)
190/// - `symbol_fts`: FTS5 virtual table for keyword/BM25 search over symbol names and content
191/// - `symbol_embedding_map`: maps integer rowids (for sqlite-vec) to symbol IDs
192/// - `symbol_vec`: sqlite-vec virtual table for vector KNN search (384-dim float32)
193const RAG_SCHEMA: &str = r#"
194CREATE TABLE IF NOT EXISTS symbol_content (
195    symbol_id TEXT PRIMARY KEY,
196    content TEXT NOT NULL,
197    header TEXT NOT NULL,
198    normalized_name TEXT NOT NULL DEFAULT ''
199);
200
201CREATE VIRTUAL TABLE IF NOT EXISTS symbol_fts USING fts5(
202    symbol_name,
203    normalized_name,
204    content,
205    content=symbol_content,
206    content_rowid=rowid
207);
208
209-- Triggers to keep FTS5 in sync with symbol_content
210CREATE TRIGGER IF NOT EXISTS symbol_content_ai AFTER INSERT ON symbol_content BEGIN
211    INSERT INTO symbol_fts(rowid, symbol_name, normalized_name, content)
212    VALUES (new.rowid, (SELECT name FROM symbols WHERE id = new.symbol_id), new.normalized_name, new.content);
213END;
214
215CREATE TRIGGER IF NOT EXISTS symbol_content_ad AFTER DELETE ON symbol_content BEGIN
216    INSERT INTO symbol_fts(symbol_fts, rowid, symbol_name, normalized_name, content)
217    VALUES ('delete', old.rowid, (SELECT name FROM symbols WHERE id = old.symbol_id), old.normalized_name, old.content);
218END;
219
220CREATE TABLE IF NOT EXISTS symbol_embedding_map (
221    id INTEGER PRIMARY KEY AUTOINCREMENT,
222    symbol_id TEXT NOT NULL UNIQUE
223);
224
225CREATE INDEX IF NOT EXISTS idx_embedding_map_symbol ON symbol_embedding_map(symbol_id);
226"#;
227
228/// Default embedding dimension (BGE-small-en-v1.5).
229pub const DEFAULT_EMBEDDING_DIM: usize = 384;
230
231/// Identity of the embedding stack that produced the vectors stored in
232/// `symbol_vec`. Persisted in the `metadata` table so we can detect when the
233/// user swaps provider or model and silently invalidates the existing index
234/// even when the dimension happens to stay the same.
235///
236/// Dimension alone is not enough: two different models can share a dim
237/// (e.g. a local 384-dim BGE and an Ollama 384-dim variant), and queries
238/// against vectors generated by the other model return garbage similarity
239/// scores.
240#[derive(Debug, Clone, PartialEq, Eq)]
241pub struct EmbeddingFingerprint {
242    /// Provider class identifier (`"local"`, `"ollama"`, …).
243    pub provider: String,
244    /// Specific model identifier within that provider.
245    pub model: String,
246    /// Embedding vector dimension.
247    pub dimension: usize,
248}
249
250/// Metadata keys for the embedding fingerprint.
251const EMBED_PROVIDER_KEY: &str = "embedding_provider";
252const EMBED_MODEL_KEY: &str = "embedding_model";
253
254/// SQL to create the sqlite-vec virtual table with the given embedding dimension.
255fn rag_vec_schema(dim: usize) -> String {
256    format!("CREATE VIRTUAL TABLE IF NOT EXISTS symbol_vec USING vec0(embedding float[{dim}])")
257}
258
259/// Default directory for cartog-generated artifacts, at the project root.
260/// Holds the SQLite database and its destructive-migration backups.
261pub const DB_DIR: &str = ".cartog";
262
263/// Default SQLite database filename, stored inside [`DB_DIR`].
264pub const DB_FILENAME: &str = "db.sqlite";
265
266/// Legacy database filename at the project root, kept for backwards-compatibility
267/// lookups. Never written to for new projects: use `DB_DIR`/`DB_FILENAME` instead.
268pub const LEGACY_DB_FILE: &str = ".cartog.db";
269
270/// Milliseconds a connection waits on a locked database before giving up.
271///
272/// WAL removes reader-vs-writer contention but not writer-vs-writer or
273/// reader-vs-checkpoint contention. Without a `busy_timeout` SQLite fails
274/// immediately with `SQLITE_BUSY`; this gives bounded retry instead. Applied
275/// to every on-disk connection.
276pub const BUSY_TIMEOUT_MS: u32 = 5000;
277
278#[cfg(test)]
279thread_local! {
280    /// Test-only fault injection: when set to true, `reconcile_embedding_fingerprint`
281    /// returns SQLITE_FULL between the model write and the dimension write.
282    /// Cleared (swapped to false) on read so each fire is one-shot.
283    static RECONCILE_FAIL_AFTER_MODEL: std::sync::atomic::AtomicBool =
284        const { std::sync::atomic::AtomicBool::new(false) };
285}
286
287/// Run `PRAGMA wal_checkpoint(TRUNCATE)` on the SQLite file at `path`.
288/// No-op for missing files. Used before moving the DB to flush the WAL.
289pub fn checkpoint_wal(path: &std::path::Path) -> anyhow::Result<()> {
290    use anyhow::Context;
291    if !path.exists() {
292        return Ok(());
293    }
294    let conn = Connection::open(path)
295        .with_context(|| format!("open {} for WAL checkpoint", path.display()))?;
296    conn.execute_batch(&format!(
297        "PRAGMA busy_timeout={BUSY_TIMEOUT_MS};
298         PRAGMA wal_checkpoint(TRUNCATE);"
299    ))
300    .with_context(|| format!("PRAGMA wal_checkpoint(TRUNCATE) on {}", path.display()))?;
301    Ok(())
302}
303
304/// Maximum number of results returned by [`Database::search`].
305/// Enforced here and referenced by CLI and MCP layers.
306pub const MAX_SEARCH_LIMIT: u32 = 100;
307
308/// Split a symbol name into lowercase words for FTS5 indexing.
309///
310/// Handles camelCase, PascalCase, snake_case, SCREAMING_SNAKE_CASE, and
311/// mixed conventions. Examples:
312/// - `validateToken` → `"validate token"`
313/// - `DatabaseConnection` → `"database connection"`
314/// - `validate_token` → `"validate token"`
315/// - `TOKEN_EXPIRY` → `"token expiry"`
316/// - `getHTTPResponse` → `"get http response"`
317/// - `__init__` → `"init"`
318pub fn normalize_symbol_name(name: &str) -> String {
319    let mut words = Vec::new();
320    let mut current = String::new();
321
322    let chars: Vec<char> = name.chars().collect();
323    let len = chars.len();
324
325    for i in 0..len {
326        let c = chars[i];
327
328        if c == '_' || c == '-' {
329            if !current.is_empty() {
330                words.push(std::mem::take(&mut current));
331            }
332            continue;
333        }
334
335        if c.is_uppercase() {
336            let next_is_lower = i + 1 < len && chars[i + 1].is_lowercase();
337            let prev_is_lower = !current.is_empty() && chars[i - 1].is_lowercase();
338
339            if prev_is_lower {
340                // camelCase boundary: `validateT` → split before T
341                words.push(std::mem::take(&mut current));
342            } else if !current.is_empty() && next_is_lower {
343                // SCREAMING to PascalCase boundary: `HTTPResponse` → split before R
344                words.push(std::mem::take(&mut current));
345            }
346            current.extend(c.to_lowercase());
347        } else if c.is_alphanumeric() {
348            current.extend(c.to_lowercase());
349        } else {
350            // Non-alphanumeric (other than _ and -): treat as separator
351            if !current.is_empty() {
352                words.push(std::mem::take(&mut current));
353            }
354        }
355    }
356
357    if !current.is_empty() {
358        words.push(current);
359    }
360
361    words.join(" ")
362}
363
364pub struct Database {
365    conn: Connection,
366    /// Set when this `Database` was opened via [`Database::open_readonly`].
367    /// Captures the `metadata` snapshot at attach time so a later promotion
368    /// (Phase 5) can detect drift before switching to read-write mode. `None`
369    /// for read-write opens.
370    ///
371    /// Invariant: `pinned.is_some() == is_read_only()`. Both flow from the
372    /// same opening path, and the equivalence is what callers rely on.
373    pinned: Option<PinnedAttach>,
374}
375
376/// Snapshot of write-mode-relevant metadata captured by a read-only attach.
377/// Compared against the on-disk values when the reader decides whether it
378/// can still safely serve queries against the DB.
379#[derive(Debug, Clone, PartialEq, Eq)]
380pub struct PinnedAttach {
381    pub schema_version: u32,
382    pub embedding: Option<EmbeddingFingerprint>,
383}
384
385impl std::fmt::Debug for Database {
386    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
387        f.debug_struct("Database").finish_non_exhaustive()
388    }
389}
390
391/// Register the sqlite-vec extension globally.
392///
393/// Must be called once before opening any database connections.
394/// Safe to call multiple times (idempotent via `std::sync::Once`).
395pub fn register_sqlite_vec() {
396    use std::sync::Once;
397    static INIT: Once = Once::new();
398    INIT.call_once(|| unsafe {
399        #[allow(clippy::missing_transmute_annotations)]
400        sqlite3_auto_extension(Some(std::mem::transmute(sqlite3_vec_init as *const ())));
401    });
402}
403
404/// Current schema version. Increment when adding migrations.
405const SCHEMA_VERSION: u32 = 7;
406
407/// Public mirror of the private `SCHEMA_VERSION` for callers outside this crate
408/// (e.g. `cartog pull` needs it to compare against a pulled DB and refuse
409/// to load a future-versioned file). Kept in sync by construction.
410pub const CURRENT_SCHEMA_VERSION: u32 = SCHEMA_VERSION;
411
412/// Read the `schema_version` recorded in a cartog SQLite file at `path`,
413/// without going through the full [`Database::open`] machinery (no
414/// migrations, no fingerprint reconciliation). Used by `cartog pull` to
415/// guard against pulling a future-versioned DB before clobbering the
416/// local one.
417///
418/// Returns `Ok(0)` when the file exists but is not a cartog DB (no
419/// `metadata` table, or no `schema_version` row). Returns `Err` only on
420/// genuine SQLite errors (corrupt file, permission denied, etc.).
421pub fn read_schema_version_at(path: &std::path::Path) -> anyhow::Result<u32> {
422    use anyhow::Context;
423    let conn = Connection::open_with_flags(
424        path,
425        rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY | rusqlite::OpenFlags::SQLITE_OPEN_URI,
426    )
427    .with_context(|| format!("open {} read-only for schema check", path.display()))?;
428    Ok(read_schema_version(&conn)?)
429}
430
431/// Read a single `metadata` value by key from a cartog SQLite file at `path`,
432/// without the full [`Database::open`] machinery. Mirrors
433/// [`read_schema_version_at`]; used by `cartog push`/`pull` to read the
434/// `last_commit` provenance row off a closed DB file.
435///
436/// Returns `Ok(None)` when the file is a cartog DB but lacks the row, or when
437/// it has no `metadata` table at all (not a cartog DB). Returns `Err` only on
438/// genuine SQLite errors (corrupt file, permission denied, etc.).
439pub fn read_metadata_at(path: &std::path::Path, key: &str) -> anyhow::Result<Option<String>> {
440    use anyhow::Context;
441    let conn = Connection::open_with_flags(
442        path,
443        rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY | rusqlite::OpenFlags::SQLITE_OPEN_URI,
444    )
445    .with_context(|| format!("open {} read-only for metadata read", path.display()))?;
446    match conn.query_row(
447        "SELECT value FROM metadata WHERE key = ?1",
448        rusqlite::params![key],
449        |row| row.get::<_, Option<String>>(0),
450    ) {
451        // Row present; value may be a string or SQL NULL (a corrupt/hand-edited
452        // row) — both collapse to "no usable value", same as a missing row.
453        Ok(v) => Ok(v),
454        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
455        // Missing `metadata` table entirely (non-cartog SQLite file): treat as
456        // absent rather than an error, matching read_schema_version's stored=0.
457        Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
458            if msg.contains("no such table: metadata") =>
459        {
460            Ok(None)
461        }
462        Err(e) => Err(e).with_context(|| format!("read metadata[{key}] from {}", path.display())),
463    }
464}
465
466/// True when the `symbol_vec` virtual table exists in the open DB. Used by
467/// the fast-path early returns in [`handle_embedding_dimension`] and
468/// [`Database::reconcile_embedding_fingerprint`] so a previously-corrupted
469/// DB (table dropped externally, or a pre-C4 cartog that crashed between
470/// DROP and CREATE) is detected and rebuilt instead of silently passing
471/// the metadata-only check.
472fn symbol_vec_exists(conn: &Connection) -> std::result::Result<bool, rusqlite::Error> {
473    conn.query_row(
474        "SELECT 1 FROM sqlite_master WHERE type IN ('table','view') AND name='symbol_vec'",
475        [],
476        |row| row.get::<_, i64>(0),
477    )
478    .optional()
479    .map(|v| v.is_some())
480}
481
482/// Read the on-disk `schema_version` for the read-only open paths.
483/// A missing row (or missing `metadata` table — a non-cartog SQLite file
484/// at the path) is treated as `stored = 0`, which surfaces to the caller
485/// as `DbError::SchemaDrift { expected, stored: 0 }` rather than a raw
486/// rusqlite error. Lets `cartog serve` print "another writer upgraded the
487/// schema; restart this session" (the actionable message) instead of
488/// "Query returned no rows" or "no such table: metadata".
489fn read_schema_version(conn: &Connection) -> std::result::Result<u32, DbError> {
490    match conn.query_row(
491        "SELECT CAST(value AS INTEGER) FROM metadata WHERE key = 'schema_version'",
492        [],
493        |row| row.get::<_, u32>(0),
494    ) {
495        Ok(v) => Ok(v),
496        // Missing row inside an existing table: stored=0.
497        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(0),
498        // Missing `metadata` table entirely (non-cartog SQLite file at the
499        // path, or a partially-initialised DB): stored=0. rusqlite reports
500        // this as a generic SqliteFailure; the message is the only stable
501        // signal for "no such table" specifically.
502        Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
503            if msg.contains("no such table: metadata") =>
504        {
505            Ok(0)
506        }
507        Err(e) => Err(DbError::Sqlite(e)),
508    }
509}
510
511/// Run schema migrations for existing databases.
512///
513/// Uses the `metadata` table to track the current schema version.
514/// Each migration runs once and is idempotent. New databases start at
515/// the latest version (SCHEMA already includes all columns).
516fn migrate(conn: &Connection) {
517    let current: u32 = conn
518        .query_row(
519            "SELECT CAST(value AS INTEGER) FROM metadata WHERE key = 'schema_version'",
520            [],
521            |row| row.get(0),
522        )
523        .unwrap_or(1); // pre-versioning databases are version 1
524
525    // Check for partially-migrated v3: schema version bumped but columns missing.
526    // Must run BEFORE the early return since current may already be >= SCHEMA_VERSION.
527    let has_hash_cols = conn
528        .prepare("SELECT content_hash FROM symbols LIMIT 0")
529        .is_ok();
530    // Same idea for v4: ensure the resolution_state column exists even if
531    // schema_version was already bumped (e.g. partial migration crash).
532    let has_resolution_state = conn
533        .prepare("SELECT resolution_state FROM edges LIMIT 0")
534        .is_ok();
535    // Same idea for v5: ensure query_log exists even on partial migration.
536    let has_query_log = conn.prepare("SELECT 1 FROM query_log LIMIT 0").is_ok();
537    // Same idea for v6: ensure the resolution_source column exists.
538    let has_resolution_source = conn
539        .prepare("SELECT resolution_source FROM edges LIMIT 0")
540        .is_ok();
541
542    if current >= SCHEMA_VERSION
543        && has_hash_cols
544        && has_resolution_state
545        && has_query_log
546        && has_resolution_source
547    {
548        return;
549    }
550
551    // Fresh-DB fast path: the SCHEMA bootstrap just created every table at the
552    // current shape, so all columns/tables exist but no schema_version row is
553    // stamped yet (current was read as 1 via unwrap_or). Stamp the version and
554    // skip the ladder, avoiding the needless v2→3 wipe and the
555    // resolution_source "duplicate column" WARN on every fresh open.
556    // Require an empty symbols table AND all four probes: a real pre-versioning
557    // v1 DB has rows, and a crash-mid-migration DB is missing a column, so
558    // neither is misclassified as fresh.
559    let no_version_row = conn
560        .query_row(
561            "SELECT 1 FROM metadata WHERE key = 'schema_version'",
562            [],
563            |_| Ok(()),
564        )
565        .is_err();
566    let symbols_empty = conn
567        .query_row("SELECT COUNT(*) FROM symbols", [], |r| r.get::<_, i64>(0))
568        .map(|c| c == 0)
569        .unwrap_or(false);
570    if no_version_row
571        && symbols_empty
572        && has_hash_cols
573        && has_resolution_state
574        && has_query_log
575        && has_resolution_source
576    {
577        if let Err(e) = conn.execute(
578            "INSERT OR REPLACE INTO metadata (key, value) VALUES ('schema_version', ?1)",
579            params![SCHEMA_VERSION.to_string()],
580        ) {
581            warn!(error = %e, "failed to stamp fresh-DB schema version");
582        }
583        return;
584    }
585
586    // Migration 1 → 2: add in_degree column for centrality ranking
587    if current < 2 {
588        let _ = conn.execute(
589            "ALTER TABLE symbols ADD COLUMN in_degree INTEGER DEFAULT 0",
590            [],
591        );
592    }
593
594    // Migration 2 → 3: stable symbol IDs + Merkle hash columns.
595    if current < 3 || !has_hash_cols {
596        info!("schema v3: stable symbol IDs — clearing index for full rebuild");
597        let _ = conn.execute("ALTER TABLE symbols ADD COLUMN content_hash TEXT", []);
598        let _ = conn.execute("ALTER TABLE symbols ADD COLUMN subtree_hash TEXT", []);
599        // Clear all indexed data so next index rebuilds with stable IDs
600        for table in &["symbol_content", "edges", "symbols", "files"] {
601            let _ = conn.execute(&format!("DELETE FROM {table}"), []);
602        }
603        // Clear RAG data too — vector table first, then map
604        let _ = conn.execute("DELETE FROM symbol_vec", []);
605        let _ = conn.execute("DELETE FROM symbol_embedding_map", []);
606        // Clear last_commit so incremental indexing doesn't skip anything
607        let _ = conn.execute("DELETE FROM metadata WHERE key = 'last_commit'", []);
608    }
609
610    // Migration 3 → 4: edge resolution_state for the LSP "unresolvable" marker.
611    // Non-destructive: column is additive, existing nulls become state=0
612    // (will be re-attempted by LSP), existing target_ids become state=1.
613    // The matching partial index is created in `Database::open` after this
614    // function returns — keeps the SCHEMA bootstrap pre-migration safe.
615    if current < 4 || !has_resolution_state {
616        info!("schema v4: adding edges.resolution_state column");
617        let _ = conn.execute(
618            "ALTER TABLE edges ADD COLUMN resolution_state INTEGER NOT NULL DEFAULT 0",
619            [],
620        );
621        let _ = conn.execute(
622            "UPDATE edges SET resolution_state = 1 WHERE target_id IS NOT NULL",
623            [],
624        );
625    }
626
627    // Migration 4 → 5: query_log table for `cartog stats --savings`.
628    // Additive only; the SCHEMA bootstrap above already runs `CREATE TABLE IF
629    // NOT EXISTS query_log`, so this branch is just the version bump for
630    // databases that ran through `migrate()` on a pre-v5 binary.
631    if current < 5 || !has_query_log {
632        info!("schema v5: query_log table");
633        let _ = conn.execute(
634            "CREATE TABLE IF NOT EXISTS query_log (
635                id INTEGER PRIMARY KEY AUTOINCREMENT,
636                tool TEXT NOT NULL,
637                source TEXT NOT NULL,
638                ts INTEGER NOT NULL
639            )",
640            [],
641        );
642        let _ = conn.execute(
643            "CREATE INDEX IF NOT EXISTS idx_query_log_tool ON query_log(tool)",
644            [],
645        );
646        let _ = conn.execute(
647            "CREATE INDEX IF NOT EXISTS idx_query_log_ts ON query_log(ts)",
648            [],
649        );
650    }
651
652    // Migration 5 → 6: edges.resolution_source records WHICH tier/source resolved
653    // each edge. Additive, nullable. Pre-v6 resolved edges have an indistinguishable
654    // tier, so they stay NULL ("unknown / pre-provenance") rather than guess a sentinel.
655    if current < 6 || !has_resolution_source {
656        info!("schema v6: adding edges.resolution_source column");
657        // Surface a failed ALTER (matches the schema-version write below): the
658        // probe guard re-runs the migration on the next open, so this is logged
659        // rather than fatal, consistent with the other additive migrations.
660        if let Err(e) = conn.execute("ALTER TABLE edges ADD COLUMN resolution_source TEXT", []) {
661            warn!(error = %e, "failed to add edges.resolution_source column");
662        }
663    }
664
665    // Migration 6 → 7: symbol-ID leaf-name escaping for injectivity.
666    // The ID format gained separator-escaping for composite leaf names (dotted
667    // import paths, `.`/`:`-bearing markdown headings) so distinct symbols can no
668    // longer collide to one ID. Existing rows carry the old (collidable) IDs, so
669    // clear the index for a full rebuild — mirrors the v2→3 stable-ID wipe.
670    if current < 7 {
671        info!("schema v7: symbol-ID escaping — clearing index for full rebuild");
672        for table in &["symbol_content", "edges", "symbols", "files"] {
673            let _ = conn.execute(&format!("DELETE FROM {table}"), []);
674        }
675        let _ = conn.execute("DELETE FROM symbol_vec", []);
676        let _ = conn.execute("DELETE FROM symbol_embedding_map", []);
677        let _ = conn.execute("DELETE FROM metadata WHERE key = 'last_commit'", []);
678    }
679
680    // Store the new schema version
681    if let Err(e) = conn.execute(
682        "INSERT OR REPLACE INTO metadata (key, value) VALUES ('schema_version', ?1)",
683        params![SCHEMA_VERSION.to_string()],
684    ) {
685        warn!(error = %e, "failed to store schema version");
686    }
687}
688
689/// Retry backoff schedule for writes that race with another writer on the
690/// embedding-dimension migration. Multiple cartog processes can each call
691/// `Database::open` and contend on the same DB; `PRAGMA busy_timeout` only
692/// covers single statements, not the full sequence here. Exhausting the
693/// schedule (~2s total) returns the underlying error unchanged.
694const MIGRATION_RETRY_BACKOFF_MS: &[u64] = &[50, 100, 250, 500, 1000];
695
696/// Run a fallible rusqlite operation, retrying on `SQLITE_BUSY` /
697/// `SQLITE_LOCKED` with the [`MIGRATION_RETRY_BACKOFF_MS`] schedule.
698fn retry_busy<T, F>(mut op: F) -> std::result::Result<T, rusqlite::Error>
699where
700    F: FnMut() -> std::result::Result<T, rusqlite::Error>,
701{
702    let mut attempt = 0usize;
703    loop {
704        match op() {
705            Ok(v) => return Ok(v),
706            Err(e) => {
707                let busy = matches!(
708                    e,
709                    rusqlite::Error::SqliteFailure(
710                        rusqlite::ffi::Error {
711                            code: rusqlite::ErrorCode::DatabaseBusy
712                                | rusqlite::ErrorCode::DatabaseLocked,
713                            ..
714                        },
715                        _
716                    )
717                );
718                if !busy || attempt >= MIGRATION_RETRY_BACKOFF_MS.len() {
719                    return Err(e);
720                }
721                let delay_ms = MIGRATION_RETRY_BACKOFF_MS[attempt];
722                tracing::debug!(
723                    attempt = attempt + 1,
724                    delay_ms,
725                    "retrying embedding-dimension write after SQLITE_BUSY"
726                );
727                std::thread::sleep(std::time::Duration::from_millis(delay_ms));
728                attempt += 1;
729            }
730        }
731    }
732}
733
734/// Check stored embedding dimension against requested dimension.
735/// If they differ, drop the vector table and clear the embedding map.
736///
737/// Returns rusqlite's `Result` so the caller (`Database::open`) can wrap
738/// any failure into `DbError::EmbeddingDimension` with precise context.
739///
740/// Writes are wrapped in [`retry_busy`] so a concurrent writer on the
741/// same DB (another cartog process) doesn't crash this `Database::open`
742/// with `SQLITE_BUSY`. When the stored dimension already matches the
743/// effective one, the function returns without any DB writes at all.
744fn handle_embedding_dimension(
745    conn: &Connection,
746    requested_dim: usize,
747) -> std::result::Result<(), rusqlite::Error> {
748    let stored_dim: Option<usize> = conn
749        .query_row(
750            "SELECT CAST(value AS INTEGER) FROM metadata WHERE key = 'embedding_dimension'",
751            [],
752            |row| row.get::<_, i64>(0).map(|v| v as usize),
753        )
754        .ok();
755
756    // When the caller passes the default dimension and a different dimension is
757    // already stored, preserve the stored one. This avoids non-RAG commands
758    // (which don't know the real provider dimension) from silently wiping a
759    // vector index created by an Ollama provider with auto-detected dimension.
760    let effective_dim = match stored_dim {
761        Some(old) if requested_dim == DEFAULT_EMBEDDING_DIM && old != DEFAULT_EMBEDDING_DIM => old,
762        _ => requested_dim,
763    };
764
765    // True early return: if the dim already matches AND the vector table
766    // actually exists, nothing to write. The dim+table pair is the real
767    // invariant; checking metadata alone misses the case where a previous
768    // open crashed mid-migration and left the DB without `symbol_vec`
769    // while metadata still claims a dimension.
770    if stored_dim == Some(effective_dim) && symbol_vec_exists(conn)? {
771        return Ok(());
772    }
773
774    // Wrap the wipe+rebuild sequence in a single transaction so a mid-
775    // sequence failure (busy timeout exhausted, disk full, etc.) rolls
776    // back atomically. Without this, a DROP that succeeds followed by an
777    // INSERT that fails would leave the DB with no `symbol_vec` but
778    // metadata pointing at the old dimension — the next open would skip
779    // migration ("stored == requested") and queries against the missing
780    // table would error forever.
781    let schema = rag_vec_schema(effective_dim);
782    let needs_wipe = stored_dim.is_some();
783    retry_busy(|| {
784        let tx = conn.unchecked_transaction()?;
785        if needs_wipe {
786            let old_dim = stored_dim.unwrap_or(0);
787            tracing::warn!(
788                old = old_dim,
789                new = effective_dim,
790                "Embedding dimension changed — clearing vector index. Run `cartog rag index` to re-embed."
791            );
792            tx.execute("DROP TABLE IF EXISTS symbol_vec", [])?;
793            tx.execute("DELETE FROM symbol_embedding_map", [])?;
794        }
795        tx.execute_batch(&schema)?;
796        tx.execute(
797            "INSERT OR REPLACE INTO metadata (key, value) VALUES ('embedding_dimension', ?1)",
798            params![effective_dim.to_string()],
799        )?;
800        tx.commit()
801    })?;
802
803    Ok(())
804}
805
806/// If the next migration will wipe existing data, copy the database to a
807/// timestamped backup file first. No-op for in-memory or empty databases.
808fn backup_before_destructive_migration(
809    conn: &Connection,
810    db_path: &std::path::Path,
811) -> DbResult<()> {
812    let current: u32 = conn
813        .query_row(
814            "SELECT CAST(value AS INTEGER) FROM metadata WHERE key = 'schema_version'",
815            [],
816            |row| row.get(0),
817        )
818        .unwrap_or(1);
819    let has_hash_cols = conn
820        .prepare("SELECT content_hash FROM symbols LIMIT 0")
821        .is_ok();
822
823    // Mirrors the destructive conditions in `migrate()`: the 2→3 stable-id wipe
824    // (`current < 3 || !has_hash_cols`) and the 6→7 symbol-id-escaping wipe
825    // (`current < 7`). Either clears every indexed row, so back up first.
826    let will_wipe = current < 7 || !has_hash_cols;
827    if !will_wipe {
828        return Ok(());
829    }
830
831    // Back up if ANY wiped table holds data, not just `symbols`: a partially
832    // indexed DB (e.g. edges/content written before symbols) would otherwise
833    // skip the backup and lose those rows to the wipe. A missing table errors
834    // the EXISTS probe, which `unwrap_or(false)` treats as empty.
835    let has_rows = |table: &str| -> bool {
836        conn.query_row(&format!("SELECT EXISTS(SELECT 1 FROM {table})"), [], |r| {
837            r.get::<_, bool>(0)
838        })
839        .unwrap_or(false)
840    };
841    let any_indexed = [
842        "symbols",
843        "edges",
844        "files",
845        "symbol_content",
846        "symbol_embedding_map",
847    ]
848    .iter()
849    .any(|t| has_rows(t));
850    if !any_indexed {
851        return Ok(());
852    }
853
854    // Skip in-memory / URI-mode databases — nothing to back up.
855    let path_str = db_path.to_string_lossy();
856    if path_str.is_empty() || path_str == ":memory:" || path_str.starts_with("file:") {
857        return Ok(());
858    }
859
860    let ts = std::time::SystemTime::now()
861        .duration_since(std::time::UNIX_EPOCH)
862        .map(|d| d.as_secs())
863        .unwrap_or(0);
864    let mut backup_os = db_path.as_os_str().to_os_string();
865    backup_os.push(format!(".pre-v{current}-{ts}.bak"));
866    let backup_path = std::path::PathBuf::from(backup_os);
867
868    // VACUUM INTO produces a consistent copy, safe alongside WAL.
869    // Escape any single-quotes in the path literal.
870    let escaped = backup_path.to_string_lossy().replace('\'', "''");
871    conn.execute(&format!("VACUUM INTO '{escaped}'"), [])
872        .map_err(|source| DbError::BackupFailed {
873            path: backup_path.clone(),
874            source,
875        })?;
876
877    let symbol_count: i64 = conn
878        .query_row("SELECT COUNT(*) FROM symbols", [], |row| row.get(0))
879        .unwrap_or(0);
880    info!(
881        backup = %backup_path.display(),
882        old_version = current,
883        new_version = SCHEMA_VERSION,
884        symbols = symbol_count,
885        "schema migration will clear indexed data — created backup"
886    );
887
888    Ok(())
889}
890
891// The `Database` inherent impl is split across `store/` submodules for
892// navigability; each file holds one cohesive cluster of methods.
893mod store;
894pub use store::queries::PathHop;
895pub use store::rag::KindScope;
896
897/// An unresolved edge from the database (used by LSP resolution).
898#[derive(Debug, Clone)]
899pub struct UnresolvedEdge {
900    pub edge_id: i64,
901    pub target_name: String,
902    pub file_path: String,
903    pub line: u32,
904}
905
906#[derive(Debug, Clone, Serialize, schemars::JsonSchema)]
907pub struct IndexStats {
908    pub num_files: u32,
909    pub num_symbols: u32,
910    pub num_edges: u32,
911    pub num_resolved: u32,
912    /// Edges at `resolution_state = 2` (LSP definitively gave up: typo, dyn dispatch, macro).
913    pub num_unresolvable: u32,
914    /// Edges at `resolution_state = 3` (LSP located the target outside the indexed root).
915    pub num_external: u32,
916    pub languages: Vec<(String, u32)>,
917    pub symbol_kinds: Vec<(String, u32)>,
918}
919
920/// Per-tool query counts + token-savings estimate for `cartog stats --savings`.
921///
922/// Carries both sides of the comparison (cartog vs grep+read) so the CLI can
923/// render a "with / without / saved" breakdown that's actually informative —
924/// the flat delta on its own under-explains where the number comes from.
925#[derive(Debug, Clone, Serialize)]
926pub struct SavingsReport {
927    /// `(tool_name, count)` sorted by count descending, then tool name.
928    pub by_tool: Vec<(String, u64)>,
929    /// `(source, count)` for `"cli"` / `"mcp"`.
930    pub by_source: Vec<(String, u64)>,
931    /// Sum of all per-tool counts.
932    pub total_queries: u64,
933    /// Estimated tokens cartog used for `total_queries` reads.
934    pub tokens_used_cartog: u64,
935    /// Estimated tokens an equivalent grep+read flow would have used.
936    pub tokens_used_grep: u64,
937    /// `tokens_used_grep - tokens_used_cartog`. Same as the old
938    /// `estimated_tokens_saved` field; kept for JSON back-compat.
939    pub estimated_tokens_saved: u64,
940    /// Integer percent of `tokens_used_grep` saved (0–99). Caps at 99 so
941    /// the bar never visually flat-tops at 100% on degenerate data.
942    pub percent_saved: u8,
943    /// Per-query baseline token delta (grep − cartog). Exposed so the CLI
944    /// can name the figure in the footer.
945    pub baseline_delta: u32,
946}
947
948/// Per-query token cost for cartog. Measured: ~280 tokens for a typical
949/// navigation query (`where is X used?`, `what does X call?`) including the
950/// structured response payload.
951pub const TOKENS_PER_QUERY_CARTOG: u32 = 280;
952
953/// Per-query token cost for an equivalent grep + read flow. Measured: a
954/// grep sweep plus reading the surrounding ~50 lines of each hit averages
955/// ~1,700 tokens to answer the same navigation question.
956pub const TOKENS_PER_QUERY_GREP: u32 = 1_700;
957
958/// Per-query token delta (`grep − cartog`). Coarse on purpose; refining
959/// per-tool would require richer per-call accounting and isn't worth it
960/// pre-v1. Sources: benchmarks/queries.rs (see `crates/cartog/benches/`).
961pub const TOKENS_SAVED_PER_QUERY: u32 = TOKENS_PER_QUERY_GREP - TOKENS_PER_QUERY_CARTOG;
962
963/// One-shot flag flipped the first time `log_query` fails. Surfaces a loud
964/// error so a persistently-broken `query_log` (SQLITE_FULL, missing table)
965/// is visible even when `warn!` is filtered. Process-scoped on purpose: the
966/// goal is one user-visible message per cartog invocation, not per row.
967static LOG_QUERY_FAILURE_REPORTED: std::sync::atomic::AtomicBool =
968    std::sync::atomic::AtomicBool::new(false);
969
970/// Zero-state [`SavingsReport`] used when no queries have been logged yet
971/// (or when the `query_log` table is missing on a read-only attach).
972fn empty_savings_report() -> SavingsReport {
973    SavingsReport {
974        by_tool: Vec::new(),
975        by_source: Vec::new(),
976        total_queries: 0,
977        tokens_used_cartog: 0,
978        tokens_used_grep: 0,
979        estimated_tokens_saved: 0,
980        percent_saved: 0,
981        baseline_delta: TOKENS_SAVED_PER_QUERY,
982    }
983}
984
985/// Returns true when a rusqlite error specifically indicates a missing table,
986/// not any other prepare failure. Used by `savings_breakdown` to distinguish
987/// "query_log doesn't exist yet" (return empty report) from real DB faults
988/// (propagate).
989fn is_no_such_table(e: &rusqlite::Error) -> bool {
990    // SQLite raises SQLITE_ERROR (primary code 1) with a message starting
991    // "no such table: <name>". Match on the variant + the message inside it
992    // rather than `e.to_string()` so a future change to rusqlite's Display
993    // wrapper doesn't break the dispatch silently.
994    matches!(
995        e,
996        rusqlite::Error::SqliteFailure(_, Some(msg)) if msg.contains("no such table")
997    )
998}
999
1000// ── Row Mapping Helpers ──
1001
1002fn row_to_symbol(row: &rusqlite::Row<'_>) -> rusqlite::Result<Symbol> {
1003    row_to_symbol_offset(row, 0)
1004}
1005
1006fn row_to_symbol_offset(row: &rusqlite::Row<'_>, off: usize) -> rusqlite::Result<Symbol> {
1007    let kind_str = row.get::<_, String>(off + 2)?;
1008    let kind = kind_str.parse().unwrap_or_else(|_| {
1009        warn!(kind = %kind_str, "unknown symbol kind, defaulting to variable");
1010        SymbolKind::Variable
1011    });
1012
1013    let vis_str = row.get::<_, Option<String>>(off + 10)?.unwrap_or_default();
1014
1015    Ok(Symbol {
1016        id: row.get(off)?,
1017        name: row.get(off + 1)?,
1018        kind,
1019        file_path: row.get(off + 3)?,
1020        start_line: row.get(off + 4)?,
1021        end_line: row.get(off + 5)?,
1022        start_byte: row.get(off + 6)?,
1023        end_byte: row.get(off + 7)?,
1024        parent_id: row.get(off + 8)?,
1025        signature: row.get(off + 9)?,
1026        visibility: Visibility::from_str_lossy(&vis_str),
1027        is_async: row.get(off + 11)?,
1028        docstring: row.get(off + 12)?,
1029        in_degree: row.get(off + 13).unwrap_or(0),
1030        content_hash: row.get(off + 14).unwrap_or(None),
1031        subtree_hash: row.get(off + 15).unwrap_or(None),
1032    })
1033}
1034
1035/// When exactly 2 global matches exist, try to pick one unambiguously.
1036/// This is a last-resort heuristic — only reached after same-file, import-path,
1037/// same-directory, and parent-scope tiers all fail.
1038///
1039/// Patterns:
1040/// - type def vs method (Java/TS constructor shares class name) → prefer type def
1041/// - function vs method (Ruby/Go top-level fn vs module method) → prefer function
1042fn disambiguate_two<'a>(a: &'a (String, String), b: &'a (String, String)) -> Option<&'a String> {
1043    match kind_priority(&a.1).cmp(&kind_priority(&b.1)) {
1044        std::cmp::Ordering::Greater => Some(&a.0),
1045        std::cmp::Ordering::Less => Some(&b.0),
1046        std::cmp::Ordering::Equal => None,
1047    }
1048}
1049
1050/// Higher priority = preferred in disambiguation.
1051/// Only values that differ trigger disambiguation; equal priorities → no resolution.
1052fn kind_priority(kind: &str) -> u8 {
1053    match kind {
1054        "class" | "interface" | "enum" | "type_alias" | "trait" => 3,
1055        "function" => 2,
1056        "method" => 1,
1057        _ => 0,
1058    }
1059}
1060
1061/// Build an [`Edge`] from six consecutive columns starting at `base`:
1062/// `source_id, target_name, target_id, kind, file_path, line, resolution_source`.
1063///
1064/// Shared by every edge-returning query so the field reads, the warn-on-unknown
1065/// decode, and the column ordering stay in one place. Callers that prepend an
1066/// `id` column pass `base = 1`; the bare-projection impact CTE passes `base = 0`.
1067fn edge_from_row(row: &rusqlite::Row<'_>, base: usize) -> rusqlite::Result<Edge> {
1068    let kind_str = row.get::<_, String>(base + 3)?;
1069    let kind = kind_str.parse().unwrap_or_else(|_| {
1070        warn!(kind = %kind_str, "unknown edge kind, defaulting to references");
1071        EdgeKind::References
1072    });
1073
1074    let provenance = match row.get::<_, Option<String>>(base + 6)? {
1075        Some(s) => s.parse::<EdgeProvenance>().ok().or_else(|| {
1076            warn!(source = %s, "unknown edge provenance, dropping to None");
1077            None
1078        }),
1079        None => None,
1080    };
1081
1082    Ok(Edge {
1083        source_id: row.get(base)?,
1084        target_name: row.get(base + 1)?,
1085        target_id: row.get(base + 2)?,
1086        kind,
1087        file_path: row.get(base + 4)?,
1088        line: row.get(base + 5)?,
1089        provenance,
1090    })
1091}
1092
1093fn row_to_edge(row: &rusqlite::Row<'_>) -> rusqlite::Result<Edge> {
1094    edge_from_row(row, 1)
1095}
1096
1097#[cfg(test)]
1098mod tests {
1099    use super::*;
1100
1101    fn test_symbol(name: &str, kind: SymbolKind, file: &str, line: u32) -> Symbol {
1102        Symbol::new(name, kind, file, line, line + 5, 0, 100, None)
1103    }
1104
1105    // ── normalize_symbol_name tests ──
1106
1107    #[test]
1108    fn test_normalize_snake_case() {
1109        assert_eq!(normalize_symbol_name("validate_token"), "validate token");
1110        assert_eq!(
1111            normalize_symbol_name("get_current_user"),
1112            "get current user"
1113        );
1114        assert_eq!(normalize_symbol_name("_private_method"), "private method");
1115        assert_eq!(normalize_symbol_name("__init__"), "init");
1116    }
1117
1118    #[test]
1119    fn test_normalize_camel_case() {
1120        assert_eq!(normalize_symbol_name("validateToken"), "validate token");
1121        assert_eq!(normalize_symbol_name("getCurrentUser"), "get current user");
1122        assert_eq!(normalize_symbol_name("findByToken"), "find by token");
1123    }
1124
1125    #[test]
1126    fn test_normalize_pascal_case() {
1127        assert_eq!(
1128            normalize_symbol_name("DatabaseConnection"),
1129            "database connection"
1130        );
1131        assert_eq!(normalize_symbol_name("AuthService"), "auth service");
1132        assert_eq!(normalize_symbol_name("TokenError"), "token error");
1133    }
1134
1135    #[test]
1136    fn test_normalize_screaming_snake() {
1137        assert_eq!(normalize_symbol_name("TOKEN_EXPIRY"), "token expiry");
1138        assert_eq!(normalize_symbol_name("MAX_RETRY_COUNT"), "max retry count");
1139    }
1140
1141    #[test]
1142    fn test_normalize_acronyms() {
1143        assert_eq!(
1144            normalize_symbol_name("getHTTPResponse"),
1145            "get http response"
1146        );
1147        assert_eq!(normalize_symbol_name("parseJSON"), "parse json");
1148        assert_eq!(normalize_symbol_name("HTMLParser"), "html parser");
1149    }
1150
1151    #[test]
1152    fn test_normalize_single_word() {
1153        assert_eq!(normalize_symbol_name("validate"), "validate");
1154        assert_eq!(normalize_symbol_name("Token"), "token");
1155    }
1156
1157    #[test]
1158    fn test_normalize_empty_and_special() {
1159        assert_eq!(normalize_symbol_name(""), "");
1160        assert_eq!(normalize_symbol_name("_"), "");
1161        assert_eq!(normalize_symbol_name("___"), "");
1162    }
1163
1164    #[test]
1165    fn test_insert_and_query_symbols() {
1166        let db = Database::open_memory().unwrap();
1167        let sym = test_symbol("my_func", SymbolKind::Function, "test.py", 10);
1168        db.insert_symbol(&sym).unwrap();
1169
1170        let outline = db.outline("test.py").unwrap();
1171        assert_eq!(outline.len(), 1);
1172        assert_eq!(outline[0].name, "my_func");
1173    }
1174
1175    #[test]
1176    fn test_optimize_populates_planner_stats() {
1177        // PRAGMA optimize must build sqlite_stat1 once the tables are large
1178        // enough to be worth analyzing — proving the planner has real stats to
1179        // pick join order from (the #110 misplan was a no-stats guess).
1180        let db = Database::open_memory().unwrap();
1181        let syms: Vec<_> = (0..2000)
1182            .map(|i| test_symbol(&format!("f{i}"), SymbolKind::Function, "a.py", i + 1))
1183            .collect();
1184        db.insert_symbols(&syms).unwrap();
1185
1186        db.optimize().unwrap();
1187
1188        let analyzed: i64 = db
1189            .conn
1190            .query_row(
1191                "SELECT COUNT(*) FROM sqlite_master WHERE name = 'sqlite_stat1'",
1192                [],
1193                |row| row.get(0),
1194            )
1195            .unwrap();
1196        assert_eq!(analyzed, 1, "PRAGMA optimize must create sqlite_stat1");
1197    }
1198
1199    #[test]
1200    fn test_optimize_is_safe_on_empty_db() {
1201        let db = Database::open_memory().unwrap();
1202        db.optimize().unwrap(); // no-op, must not error
1203    }
1204
1205    #[test]
1206    fn is_empty_reflects_symbol_presence() {
1207        let db = Database::open_memory().unwrap();
1208        assert!(db.is_empty().unwrap(), "fresh DB should be empty");
1209        db.insert_symbol(&test_symbol("f", SymbolKind::Function, "a.py", 1))
1210            .unwrap();
1211        assert!(!db.is_empty().unwrap(), "DB with a symbol is not empty");
1212    }
1213
1214    #[test]
1215    fn test_insert_and_query_edges() {
1216        let db = Database::open_memory().unwrap();
1217        let caller = test_symbol("caller_fn", SymbolKind::Function, "a.py", 1);
1218        let callee = test_symbol("callee_fn", SymbolKind::Function, "b.py", 1);
1219        db.insert_symbol(&caller).unwrap();
1220        db.insert_symbol(&callee).unwrap();
1221
1222        let edge = Edge {
1223            source_id: caller.id.clone(),
1224            target_name: "callee_fn".to_string(),
1225            target_id: None,
1226            kind: EdgeKind::Calls,
1227            file_path: "a.py".to_string(),
1228            line: 5,
1229            provenance: None,
1230        };
1231        db.insert_edge(&edge).unwrap();
1232
1233        let refs = db.refs("callee_fn", None).unwrap();
1234        assert_eq!(refs.len(), 1);
1235        assert_eq!(refs[0].0.source_id, caller.id);
1236    }
1237
1238    #[test]
1239    fn test_edge_resolution() {
1240        let db = Database::open_memory().unwrap();
1241        let sym_a = test_symbol("process", SymbolKind::Function, "a.py", 1);
1242        let sym_b = test_symbol("helper", SymbolKind::Function, "a.py", 20);
1243        db.insert_symbols(&[sym_a.clone(), sym_b.clone()]).unwrap();
1244
1245        let edge = Edge {
1246            source_id: sym_a.id.clone(),
1247            target_name: "helper".to_string(),
1248            target_id: None,
1249            kind: EdgeKind::Calls,
1250            file_path: "a.py".to_string(),
1251            line: 5,
1252            provenance: None,
1253        };
1254        db.insert_edge(&edge).unwrap();
1255
1256        let resolved = db.resolve_edges().unwrap();
1257        assert_eq!(resolved, 1);
1258    }
1259
1260    #[test]
1261    fn test_stats() {
1262        let db = Database::open_memory().unwrap();
1263        let file = FileInfo {
1264            path: "test.py".to_string(),
1265            last_modified: 0.0,
1266            hash: "abc".to_string(),
1267            language: "python".to_string(),
1268            num_symbols: 2,
1269        };
1270        db.upsert_file(&file).unwrap();
1271        let sym = test_symbol("foo", SymbolKind::Function, "test.py", 1);
1272        db.insert_symbol(&sym).unwrap();
1273
1274        let stats = db.stats().unwrap();
1275        assert_eq!(stats.num_files, 1);
1276        assert_eq!(stats.num_symbols, 1);
1277    }
1278
1279    #[test]
1280    fn savings_breakdown_empty_returns_zero() {
1281        let db = Database::open_memory().unwrap();
1282        let r = db.savings_breakdown().unwrap();
1283        assert_eq!(r.total_queries, 0);
1284        assert_eq!(r.tokens_used_cartog, 0);
1285        assert_eq!(r.tokens_used_grep, 0);
1286        assert_eq!(r.estimated_tokens_saved, 0);
1287        assert_eq!(r.percent_saved, 0);
1288        assert!(r.by_tool.is_empty());
1289        assert!(r.by_source.is_empty());
1290        assert_eq!(r.baseline_delta, TOKENS_SAVED_PER_QUERY);
1291    }
1292
1293    #[test]
1294    fn log_query_persists_rows_aggregated_by_tool_and_source() {
1295        let db = Database::open_memory().unwrap();
1296        db.log_query("search", "cli");
1297        db.log_query("search", "cli");
1298        db.log_query("refs", "cli");
1299        db.log_query("search", "mcp");
1300        db.log_query("impact", "mcp");
1301
1302        let r = db.savings_breakdown().unwrap();
1303        assert_eq!(r.total_queries, 5);
1304        // With/without/saved derived from the per-query constants.
1305        assert_eq!(r.tokens_used_cartog, 5 * TOKENS_PER_QUERY_CARTOG as u64);
1306        assert_eq!(r.tokens_used_grep, 5 * TOKENS_PER_QUERY_GREP as u64);
1307        assert_eq!(r.estimated_tokens_saved, 5 * TOKENS_SAVED_PER_QUERY as u64);
1308        // ~83% saved given 280 vs 1700 baseline.
1309        assert_eq!(r.percent_saved, 83);
1310
1311        // by_tool sorted by count desc, then name
1312        let tool_counts: Vec<_> = r.by_tool.iter().map(|(t, c)| (t.as_str(), *c)).collect();
1313        assert_eq!(tool_counts, vec![("search", 3), ("impact", 1), ("refs", 1)]);
1314
1315        let src_counts: Vec<_> = r.by_source.iter().map(|(s, c)| (s.as_str(), *c)).collect();
1316        assert_eq!(src_counts, vec![("cli", 3), ("mcp", 2)]);
1317    }
1318
1319    #[test]
1320    fn log_query_noop_on_read_only_attach() {
1321        let dir = tempfile::TempDir::new().unwrap();
1322        let db_path = dir.path().join("test.db");
1323        {
1324            let primary = Database::open(&db_path, 384).unwrap();
1325            primary.log_query("search", "cli"); // primary write succeeds
1326        }
1327
1328        let reader = Database::open_readonly(&db_path).unwrap();
1329        assert!(reader.is_read_only());
1330        // log_query on read-only attach must silently no-op (no panic, no insert).
1331        reader.log_query("search", "mcp");
1332        reader.log_query("refs", "mcp");
1333
1334        let r = reader.savings_breakdown().unwrap();
1335        // Only the primary's row is visible — secondary writes were dropped.
1336        assert_eq!(r.total_queries, 1);
1337        assert_eq!(r.by_tool, vec![("search".to_string(), 1)]);
1338    }
1339
1340    #[test]
1341    fn test_resolve_edges_same_dir_priority() {
1342        let db = Database::open_memory().unwrap();
1343
1344        // "helper" exists in same dir (src/utils.py) and elsewhere (lib/utils.py)
1345        let caller = test_symbol("process", SymbolKind::Function, "src/main.py", 1);
1346        let same_dir = test_symbol("helper", SymbolKind::Function, "src/utils.py", 1);
1347        let other_dir = test_symbol("helper", SymbolKind::Function, "lib/utils.py", 1);
1348        db.insert_symbols(&[caller.clone(), same_dir.clone(), other_dir.clone()])
1349            .unwrap();
1350
1351        let edge = Edge {
1352            source_id: caller.id.clone(),
1353            target_name: "helper".to_string(),
1354            target_id: None,
1355            kind: EdgeKind::Calls,
1356            file_path: "src/main.py".to_string(),
1357            line: 5,
1358            provenance: None,
1359        };
1360        db.insert_edge(&edge).unwrap();
1361
1362        let resolved = db.resolve_edges().unwrap();
1363        assert_eq!(resolved, 1);
1364
1365        // Verify it resolved to the same-directory symbol
1366        let refs = db.refs("helper", None).unwrap();
1367        let call_edge = refs
1368            .iter()
1369            .find(|(e, _)| e.kind == EdgeKind::Calls)
1370            .unwrap();
1371        assert_eq!(call_edge.0.target_id.as_ref().unwrap(), &same_dir.id);
1372    }
1373
1374    #[test]
1375    fn test_resolve_edges_ambiguous_no_resolve() {
1376        let db = Database::open_memory().unwrap();
1377
1378        // "helper" in two different directories, caller in a third
1379        let caller = test_symbol("process", SymbolKind::Function, "app/main.py", 1);
1380        let sym1 = test_symbol("helper", SymbolKind::Function, "pkg_a/utils.py", 1);
1381        let sym2 = test_symbol("helper", SymbolKind::Function, "pkg_b/utils.py", 1);
1382        db.insert_symbols(&[caller.clone(), sym1, sym2]).unwrap();
1383
1384        let edge = Edge {
1385            source_id: caller.id.clone(),
1386            target_name: "helper".to_string(),
1387            target_id: None,
1388            kind: EdgeKind::Calls,
1389            file_path: "app/main.py".to_string(),
1390            line: 5,
1391            provenance: None,
1392        };
1393        db.insert_edge(&edge).unwrap();
1394
1395        let resolved = db.resolve_edges().unwrap();
1396        // Should NOT resolve because "helper" is ambiguous (2 matches globally)
1397        assert_eq!(resolved, 0);
1398    }
1399
1400    #[test]
1401    fn test_resolve_edges_same_file_priority() {
1402        let db = Database::open_memory().unwrap();
1403
1404        // "helper" in same file AND in another file
1405        let caller = test_symbol("process", SymbolKind::Function, "a.py", 1);
1406        let same_file = test_symbol("helper", SymbolKind::Function, "a.py", 20);
1407        let other_file = test_symbol("helper", SymbolKind::Function, "b.py", 1);
1408        db.insert_symbols(&[caller.clone(), same_file.clone(), other_file])
1409            .unwrap();
1410
1411        let edge = Edge {
1412            source_id: caller.id.clone(),
1413            target_name: "helper".to_string(),
1414            target_id: None,
1415            kind: EdgeKind::Calls,
1416            file_path: "a.py".to_string(),
1417            line: 5,
1418            provenance: None,
1419        };
1420        db.insert_edge(&edge).unwrap();
1421
1422        let resolved = db.resolve_edges().unwrap();
1423        assert_eq!(resolved, 1);
1424
1425        // Verify same-file symbol was chosen
1426        let refs = db.refs("helper", None).unwrap();
1427        let call_edge = refs
1428            .iter()
1429            .find(|(e, _)| e.kind == EdgeKind::Calls)
1430            .unwrap();
1431        assert_eq!(call_edge.0.target_id.as_ref().unwrap(), &same_file.id);
1432    }
1433
1434    #[test]
1435    fn test_resolve_edges_php_fqcn_target_same_file() {
1436        let db = Database::open_memory().unwrap();
1437
1438        // PHP emits namespace-qualified targets: `extends BaseService` inside
1439        // `namespace App\Auth` becomes "App\Auth\BaseService".
1440        let base = test_symbol("BaseService", SymbolKind::Class, "auth/service.php", 1);
1441        let child = test_symbol("AuthService", SymbolKind::Class, "auth/service.php", 30);
1442        db.insert_symbols(&[base.clone(), child.clone()]).unwrap();
1443
1444        db.insert_edge(&Edge::new(
1445            &child.id,
1446            "App\\Auth\\BaseService",
1447            EdgeKind::Inherits,
1448            "auth/service.php",
1449            30,
1450        ))
1451        .unwrap();
1452
1453        let resolved = db.resolve_edges().unwrap();
1454        assert_eq!(resolved, 1);
1455
1456        let refs = db.refs("App\\Auth\\BaseService", None).unwrap();
1457        assert_eq!(refs[0].0.target_id.as_ref().unwrap(), &base.id);
1458    }
1459
1460    #[test]
1461    fn test_resolve_edges_php_fqcn_target_prefers_class_over_import_symbol() {
1462        let db = Database::open_memory().unwrap();
1463
1464        let class_sym = test_symbol("AppError", SymbolKind::Class, "exceptions.php", 1);
1465        let child = test_symbol("TokenError", SymbolKind::Class, "auth/tokens.php", 10);
1466        // PHP `use App\AppError;` extracts an Import symbol named by FQCN.
1467        let import_sym = test_symbol("App\\AppError", SymbolKind::Import, "auth/tokens.php", 1);
1468        db.insert_symbols(&[class_sym.clone(), child.clone(), import_sym])
1469            .unwrap();
1470
1471        db.insert_edge(&Edge::new(
1472            &child.id,
1473            "App\\AppError",
1474            EdgeKind::Inherits,
1475            "auth/tokens.php",
1476            10,
1477        ))
1478        .unwrap();
1479
1480        db.resolve_edges().unwrap();
1481
1482        let refs = db.refs("App\\AppError", None).unwrap();
1483        let inherits = refs
1484            .iter()
1485            .find(|(e, _)| e.kind == EdgeKind::Inherits)
1486            .unwrap();
1487        assert_eq!(inherits.0.target_id.as_ref().unwrap(), &class_sym.id);
1488    }
1489
1490    #[test]
1491    fn test_hierarchy_finds_children_of_fqcn_resolved_target() {
1492        let db = Database::open_memory().unwrap();
1493
1494        let base = test_symbol("BaseService", SymbolKind::Class, "auth/service.php", 1);
1495        let child = test_symbol(
1496            "PaymentProcessor",
1497            SymbolKind::Class,
1498            "services/payment.php",
1499            5,
1500        );
1501        db.insert_symbols(&[base.clone(), child.clone()]).unwrap();
1502
1503        db.insert_edge(&Edge::new(
1504            &child.id,
1505            "App\\Auth\\BaseService",
1506            EdgeKind::Inherits,
1507            "services/payment.php",
1508            5,
1509        ))
1510        .unwrap();
1511        db.resolve_edges().unwrap();
1512
1513        let pairs = db.hierarchy("BaseService").unwrap();
1514        assert_eq!(
1515            pairs,
1516            vec![("PaymentProcessor".to_string(), "BaseService".to_string())]
1517        );
1518    }
1519
1520    #[test]
1521    fn test_resolve_edges_class_over_constructor() {
1522        let db = Database::open_memory().unwrap();
1523
1524        // Java pattern: Logger class + Logger() constructor method in same file
1525        let caller = test_symbol("handleLogin", SymbolKind::Method, "auth/Service.java", 10);
1526        let logger_class = test_symbol("Logger", SymbolKind::Class, "util/Logger.java", 1);
1527        let logger_ctor = test_symbol("Logger", SymbolKind::Method, "util/Logger.java", 5);
1528        db.insert_symbols(&[caller.clone(), logger_class.clone(), logger_ctor])
1529            .unwrap();
1530
1531        let edge = Edge {
1532            source_id: caller.id.clone(),
1533            target_name: "Logger".to_string(),
1534            target_id: None,
1535            kind: EdgeKind::References,
1536            file_path: "auth/Service.java".to_string(),
1537            line: 12,
1538            provenance: None,
1539        };
1540        db.insert_edge(&edge).unwrap();
1541
1542        let resolved = db.resolve_edges().unwrap();
1543        assert_eq!(resolved, 1);
1544
1545        let refs = db.refs("Logger", None).unwrap();
1546        let ref_edge = refs
1547            .iter()
1548            .find(|(e, _)| e.kind == EdgeKind::References)
1549            .unwrap();
1550        assert_eq!(ref_edge.0.target_id.as_ref().unwrap(), &logger_class.id);
1551    }
1552
1553    #[test]
1554    fn test_resolve_edges_class_over_constructor_still_ambiguous_with_three() {
1555        let db = Database::open_memory().unwrap();
1556
1557        // Three matches: class + ctor + function — should NOT resolve
1558        let caller = test_symbol("main", SymbolKind::Function, "app.java", 1);
1559        let sym_class = test_symbol("Foo", SymbolKind::Class, "a/Foo.java", 1);
1560        let sym_ctor = test_symbol("Foo", SymbolKind::Method, "a/Foo.java", 5);
1561        let sym_func = test_symbol("Foo", SymbolKind::Function, "b/Foo.java", 1);
1562        db.insert_symbols(&[caller.clone(), sym_class, sym_ctor, sym_func])
1563            .unwrap();
1564
1565        let edge = Edge {
1566            source_id: caller.id.clone(),
1567            target_name: "Foo".to_string(),
1568            target_id: None,
1569            kind: EdgeKind::Calls,
1570            file_path: "app.java".to_string(),
1571            line: 5,
1572            provenance: None,
1573        };
1574        db.insert_edge(&edge).unwrap();
1575
1576        let resolved = db.resolve_edges().unwrap();
1577        assert_eq!(resolved, 0);
1578    }
1579
1580    #[test]
1581    fn test_resolve_edges_multipass_import_then_call() {
1582        let db = Database::open_memory().unwrap();
1583
1584        // File auth/service.java imports Logger from util/Logger.java
1585        // and also calls Logger.info() — a reference to Logger
1586        let import_sym = test_symbol("util.Logger", SymbolKind::Import, "auth/service.java", 1);
1587        let caller = test_symbol("authenticate", SymbolKind::Method, "auth/service.java", 10);
1588        let logger_class = test_symbol("Logger", SymbolKind::Class, "util/Logger.java", 1);
1589        let logger_ctor = test_symbol("Logger", SymbolKind::Method, "util/Logger.java", 5);
1590        db.insert_symbols(&[
1591            import_sym.clone(),
1592            caller.clone(),
1593            logger_class.clone(),
1594            logger_ctor,
1595        ])
1596        .unwrap();
1597
1598        // Import edge: auth/service.java imports "Logger"
1599        let import_edge = Edge {
1600            source_id: import_sym.id.clone(),
1601            target_name: "Logger".to_string(),
1602            target_id: None,
1603            kind: EdgeKind::Imports,
1604            file_path: "auth/service.java".to_string(),
1605            line: 1,
1606            provenance: None,
1607        };
1608        db.insert_edge(&import_edge).unwrap();
1609
1610        // Reference edge: authenticate() references Logger
1611        let ref_edge = Edge {
1612            source_id: caller.id.clone(),
1613            target_name: "Logger".to_string(),
1614            target_id: None,
1615            kind: EdgeKind::References,
1616            file_path: "auth/service.java".to_string(),
1617            line: 15,
1618            provenance: None,
1619        };
1620        db.insert_edge(&ref_edge).unwrap();
1621
1622        let resolved = db.resolve_edges().unwrap();
1623        // Pass 1: import edge resolves via tier 6 (class over ctor)
1624        // Pass 2: reference edge resolves via tier 2 (import-path)
1625        assert_eq!(resolved, 2);
1626
1627        let refs = db.refs("Logger", None).unwrap();
1628        let reference = refs
1629            .iter()
1630            .find(|(e, _)| e.kind == EdgeKind::References)
1631            .unwrap();
1632        assert_eq!(reference.0.target_id.as_ref().unwrap(), &logger_class.id);
1633    }
1634
1635    #[test]
1636    fn test_resolve_edges_function_over_method() {
1637        let db = Database::open_memory().unwrap();
1638
1639        // Ruby pattern: get_logger as top-level function AND as module method
1640        let caller = test_symbol("process", SymbolKind::Function, "app/main.rb", 1);
1641        let top_fn = test_symbol("get_logger", SymbolKind::Function, "utils/helpers.rb", 6);
1642        let mod_method = test_symbol("get_logger", SymbolKind::Method, "utils/logging.rb", 6);
1643        db.insert_symbols(&[caller.clone(), top_fn.clone(), mod_method])
1644            .unwrap();
1645
1646        let edge = Edge {
1647            source_id: caller.id.clone(),
1648            target_name: "get_logger".to_string(),
1649            target_id: None,
1650            kind: EdgeKind::Calls,
1651            file_path: "app/main.rb".to_string(),
1652            line: 5,
1653            provenance: None,
1654        };
1655        db.insert_edge(&edge).unwrap();
1656
1657        let resolved = db.resolve_edges().unwrap();
1658        assert_eq!(resolved, 1);
1659
1660        let refs = db.refs("get_logger", None).unwrap();
1661        let call_edge = refs
1662            .iter()
1663            .find(|(e, _)| e.kind == EdgeKind::Calls)
1664            .unwrap();
1665        assert_eq!(call_edge.0.target_id.as_ref().unwrap(), &top_fn.id);
1666    }
1667
1668    #[test]
1669    fn test_resolve_edges_two_functions_still_ambiguous() {
1670        let db = Database::open_memory().unwrap();
1671
1672        // Two functions with same name in different files — should NOT resolve
1673        let caller = test_symbol("main", SymbolKind::Function, "app.rb", 1);
1674        let fn1 = test_symbol("helper", SymbolKind::Function, "a/utils.rb", 1);
1675        let fn2 = test_symbol("helper", SymbolKind::Function, "b/utils.rb", 1);
1676        db.insert_symbols(&[caller.clone(), fn1, fn2]).unwrap();
1677
1678        let edge = Edge {
1679            source_id: caller.id.clone(),
1680            target_name: "helper".to_string(),
1681            target_id: None,
1682            kind: EdgeKind::Calls,
1683            file_path: "app.rb".to_string(),
1684            line: 5,
1685            provenance: None,
1686        };
1687        db.insert_edge(&edge).unwrap();
1688
1689        let resolved = db.resolve_edges().unwrap();
1690        assert_eq!(resolved, 0);
1691    }
1692
1693    #[test]
1694    fn test_callees_query() {
1695        let db = Database::open_memory().unwrap();
1696
1697        let caller = test_symbol("process", SymbolKind::Function, "a.py", 1);
1698        let callee1 = test_symbol("fetch", SymbolKind::Function, "b.py", 1);
1699        let callee2 = test_symbol("save", SymbolKind::Function, "c.py", 1);
1700        db.insert_symbols(&[caller.clone(), callee1, callee2])
1701            .unwrap();
1702
1703        db.insert_edges(&[
1704            Edge {
1705                source_id: caller.id.clone(),
1706                target_name: "fetch".to_string(),
1707                target_id: None,
1708                kind: EdgeKind::Calls,
1709                file_path: "a.py".to_string(),
1710                line: 5,
1711                provenance: None,
1712            },
1713            Edge {
1714                source_id: caller.id.clone(),
1715                target_name: "save".to_string(),
1716                target_id: None,
1717                kind: EdgeKind::Calls,
1718                file_path: "a.py".to_string(),
1719                line: 6,
1720                provenance: None,
1721            },
1722        ])
1723        .unwrap();
1724
1725        let callees = db.callees("process").unwrap();
1726        assert_eq!(callees.len(), 2);
1727        let targets: Vec<&str> = callees.iter().map(|e| e.target_name.as_str()).collect();
1728        assert!(targets.contains(&"fetch"));
1729        assert!(targets.contains(&"save"));
1730    }
1731
1732    #[test]
1733    fn test_impact_transitive() {
1734        let db = Database::open_memory().unwrap();
1735
1736        let a = test_symbol("a", SymbolKind::Function, "a.py", 1);
1737        let b = test_symbol("b", SymbolKind::Function, "b.py", 1);
1738        let c = test_symbol("c", SymbolKind::Function, "c.py", 1);
1739        db.insert_symbols(&[a.clone(), b.clone(), c.clone()])
1740            .unwrap();
1741
1742        // b calls a, c calls b
1743        db.insert_edges(&[
1744            Edge {
1745                source_id: b.id.clone(),
1746                target_name: "a".to_string(),
1747                target_id: Some(a.id.clone()),
1748                kind: EdgeKind::Calls,
1749                file_path: "b.py".to_string(),
1750                line: 5,
1751                provenance: None,
1752            },
1753            Edge {
1754                source_id: c.id.clone(),
1755                target_name: "b".to_string(),
1756                target_id: Some(b.id.clone()),
1757                kind: EdgeKind::Calls,
1758                file_path: "c.py".to_string(),
1759                line: 5,
1760                provenance: None,
1761            },
1762        ])
1763        .unwrap();
1764
1765        // Impact of "a" with depth 2 should find b (depth 1) and c (depth 2)
1766        let results = db.impact("a", 2).unwrap();
1767        assert_eq!(results.len(), 2);
1768        assert_eq!(results[0].1, 1); // first hop
1769        assert_eq!(results[1].1, 2); // second hop
1770    }
1771
1772    #[test]
1773    fn test_impact_depth_zero_returns_empty() {
1774        let db = Database::open_memory().unwrap();
1775        let a = test_symbol("a", SymbolKind::Function, "a.py", 1);
1776        db.insert_symbols(&[a]).unwrap();
1777        assert!(db.impact("a", 0).unwrap().is_empty());
1778    }
1779
1780    #[test]
1781    fn test_impact_cycle_terminates() {
1782        // Cycle: a → b → a. impact("a", 3) must not loop forever.
1783        let db = Database::open_memory().unwrap();
1784        let a = test_symbol("a", SymbolKind::Function, "a.py", 1);
1785        let b = test_symbol("b", SymbolKind::Function, "b.py", 1);
1786        db.insert_symbols(&[a.clone(), b.clone()]).unwrap();
1787        db.insert_edges(&[
1788            Edge {
1789                source_id: a.id.clone(),
1790                target_name: "b".to_string(),
1791                target_id: Some(b.id.clone()),
1792                kind: EdgeKind::Calls,
1793                file_path: "a.py".to_string(),
1794                line: 2,
1795                provenance: None,
1796            },
1797            Edge {
1798                source_id: b.id.clone(),
1799                target_name: "a".to_string(),
1800                target_id: Some(a.id.clone()),
1801                kind: EdgeKind::Calls,
1802                file_path: "b.py".to_string(),
1803                line: 2,
1804                provenance: None,
1805            },
1806        ])
1807        .unwrap();
1808
1809        // Each of the two edges is returned once, labeled with its shallowest depth.
1810        let results = db.impact("a", 5).unwrap();
1811        assert_eq!(results.len(), 2);
1812        for (_, depth) in &results {
1813            assert!(*depth >= 1 && *depth <= 5);
1814        }
1815    }
1816
1817    #[test]
1818    fn test_impact_fanout_dedupes_by_edge() {
1819        // Two callers of `shared`, each also calling each other → diamond.
1820        // Each edge should appear once.
1821        let db = Database::open_memory().unwrap();
1822        let shared = test_symbol("shared", SymbolKind::Function, "s.py", 1);
1823        let x = test_symbol("x", SymbolKind::Function, "x.py", 1);
1824        let y = test_symbol("y", SymbolKind::Function, "y.py", 1);
1825        db.insert_symbols(&[shared.clone(), x.clone(), y.clone()])
1826            .unwrap();
1827        db.insert_edges(&[
1828            Edge {
1829                source_id: x.id.clone(),
1830                target_name: "shared".to_string(),
1831                target_id: Some(shared.id.clone()),
1832                kind: EdgeKind::Calls,
1833                file_path: "x.py".to_string(),
1834                line: 1,
1835                provenance: None,
1836            },
1837            Edge {
1838                source_id: y.id.clone(),
1839                target_name: "shared".to_string(),
1840                target_id: Some(shared.id.clone()),
1841                kind: EdgeKind::Calls,
1842                file_path: "y.py".to_string(),
1843                line: 1,
1844                provenance: None,
1845            },
1846            Edge {
1847                source_id: y.id.clone(),
1848                target_name: "x".to_string(),
1849                target_id: Some(x.id.clone()),
1850                kind: EdgeKind::Calls,
1851                file_path: "y.py".to_string(),
1852                line: 2,
1853                provenance: None,
1854            },
1855        ])
1856        .unwrap();
1857
1858        let results = db.impact("shared", 3).unwrap();
1859        // 3 distinct edges, each reported exactly once.
1860        assert_eq!(results.len(), 3);
1861    }
1862
1863    /// Build `a → b → c → d` over `calls` edges for trace tests.
1864    fn chain_db() -> Database {
1865        let db = Database::open_memory().unwrap();
1866        let names = ["a", "b", "c", "d"];
1867        let syms: Vec<Symbol> = names
1868            .iter()
1869            .map(|n| test_symbol(n, SymbolKind::Function, &format!("{n}.py"), 1))
1870            .collect();
1871        db.insert_symbols(&syms).unwrap();
1872        let edges: Vec<Edge> = syms
1873            .windows(2)
1874            .map(|w| Edge {
1875                source_id: w[0].id.clone(),
1876                target_name: w[1].name.clone(),
1877                target_id: Some(w[1].id.clone()),
1878                kind: EdgeKind::Calls,
1879                file_path: w[0].file_path.clone(),
1880                line: 2,
1881                provenance: None,
1882            })
1883            .collect();
1884        db.insert_edges(&edges).unwrap();
1885        db
1886    }
1887
1888    #[test]
1889    fn trace_returns_shortest_path_in_order() {
1890        let db = chain_db();
1891        let hops = db.trace("a", "d", 8).unwrap().expect("path a→d exists");
1892        let names: Vec<&str> = hops.iter().map(|h| h.source_name.as_str()).collect();
1893        assert_eq!(names, ["a", "b", "c"]);
1894        assert_eq!(hops.last().unwrap().target_name, "d");
1895    }
1896
1897    #[test]
1898    fn trace_returns_none_when_unreachable() {
1899        let db = chain_db();
1900        assert!(db.trace("d", "a", 8).unwrap().is_none());
1901    }
1902
1903    #[test]
1904    fn trace_same_symbol_is_empty_path() {
1905        let db = chain_db();
1906        assert_eq!(db.trace("a", "a", 8).unwrap(), Some(Vec::new()));
1907    }
1908
1909    #[test]
1910    fn trace_respects_depth_limit() {
1911        let db = chain_db();
1912        // a→d is 3 hops; depth 2 cannot reach it.
1913        assert!(db.trace("a", "d", 2).unwrap().is_none());
1914    }
1915
1916    #[test]
1917    fn trace_terminates_on_cycle() {
1918        // a → b → a. trace("a","b") returns the single hop without looping.
1919        let db = Database::open_memory().unwrap();
1920        let a = test_symbol("a", SymbolKind::Function, "a.py", 1);
1921        let b = test_symbol("b", SymbolKind::Function, "b.py", 1);
1922        db.insert_symbols(&[a.clone(), b.clone()]).unwrap();
1923        db.insert_edges(&[
1924            Edge {
1925                source_id: a.id.clone(),
1926                target_name: "b".to_string(),
1927                target_id: Some(b.id.clone()),
1928                kind: EdgeKind::Calls,
1929                file_path: "a.py".to_string(),
1930                line: 2,
1931                provenance: None,
1932            },
1933            Edge {
1934                source_id: b.id.clone(),
1935                target_name: "a".to_string(),
1936                target_id: Some(a.id.clone()),
1937                kind: EdgeKind::Calls,
1938                file_path: "b.py".to_string(),
1939                line: 2,
1940                provenance: None,
1941            },
1942        ])
1943        .unwrap();
1944        let hops = db.trace("a", "b", 8).unwrap().expect("a→b exists");
1945        assert_eq!(hops.len(), 1);
1946    }
1947
1948    #[test]
1949    fn trace_dense_cycle_does_not_loop_and_finds_target() {
1950        // A fully-connected clique a,b,c,d (every node calls every other).
1951        // Without the visited-set guard, BFS would re-expand nodes endlessly /
1952        // explode; with it, each node is expanded once and the path to the
1953        // target is the direct 1-hop edge.
1954        let db = Database::open_memory().unwrap();
1955        let names = ["a", "b", "c", "d"];
1956        let syms: Vec<Symbol> = names
1957            .iter()
1958            .map(|n| test_symbol(n, SymbolKind::Function, &format!("{n}.py"), 1))
1959            .collect();
1960        db.insert_symbols(&syms).unwrap();
1961        let mut edges = Vec::new();
1962        for src in &syms {
1963            for tgt in &syms {
1964                if src.id != tgt.id {
1965                    edges.push(Edge {
1966                        source_id: src.id.clone(),
1967                        target_name: tgt.name.clone(),
1968                        target_id: Some(tgt.id.clone()),
1969                        kind: EdgeKind::Calls,
1970                        file_path: src.file_path.clone(),
1971                        line: 2,
1972                        provenance: None,
1973                    });
1974                }
1975            }
1976        }
1977        db.insert_edges(&edges).unwrap();
1978        // Direct edge a→d exists, so the shortest path is exactly one hop.
1979        let hops = db.trace("a", "d", 20).unwrap().expect("a reaches d");
1980        assert_eq!(hops.len(), 1, "shortest path in a clique is one hop");
1981        assert_eq!(hops[0].source_name, "a");
1982        assert_eq!(hops[0].target_name, "d");
1983    }
1984
1985    #[test]
1986    fn trace_unaffected_by_comma_in_symbol_ids() {
1987        // File paths (hence symbol ids) containing commas must not corrupt the
1988        // cycle guard — visited tracking is on exact ids, not a delimited string.
1989        let db = Database::open_memory().unwrap();
1990        let a = test_symbol("a", SymbolKind::Function, "a,b.py", 1);
1991        let b = test_symbol("b", SymbolKind::Function, "c,d.py", 1);
1992        let c = test_symbol("c", SymbolKind::Function, "e,f.py", 1);
1993        db.insert_symbols(&[a.clone(), b.clone(), c.clone()])
1994            .unwrap();
1995        db.insert_edges(&[
1996            Edge {
1997                source_id: a.id.clone(),
1998                target_name: "b".to_string(),
1999                target_id: Some(b.id.clone()),
2000                kind: EdgeKind::Calls,
2001                file_path: a.file_path.clone(),
2002                line: 2,
2003                provenance: None,
2004            },
2005            Edge {
2006                source_id: b.id.clone(),
2007                target_name: "c".to_string(),
2008                target_id: Some(c.id.clone()),
2009                kind: EdgeKind::Calls,
2010                file_path: b.file_path.clone(),
2011                line: 2,
2012                provenance: None,
2013            },
2014        ])
2015        .unwrap();
2016        let hops = db
2017            .trace("a", "c", 8)
2018            .unwrap()
2019            .expect("a→b→c despite commas");
2020        assert_eq!(hops.len(), 2);
2021        assert_eq!(hops[0].source_id, a.id);
2022        assert_eq!(hops[1].source_id, b.id);
2023    }
2024
2025    #[test]
2026    fn trace_hop_carries_exact_source_id_for_overloaded_name() {
2027        // Two symbols share the name `helper` in the same file; the hop must
2028        // carry the id of the symbol actually on the path, not a name lookup.
2029        let db = Database::open_memory().unwrap();
2030        let caller = test_symbol("caller", SymbolKind::Function, "m.py", 1);
2031        let h1 = Symbol::new("helper", SymbolKind::Function, "m.py", 10, 12, 0, 5, None);
2032        let h2 = Symbol::new("helper", SymbolKind::Method, "m.py", 20, 22, 6, 11, None);
2033        db.insert_symbols(&[caller.clone(), h1.clone(), h2.clone()])
2034            .unwrap();
2035        // caller → the method overload (h2) specifically (resolved target_id).
2036        db.insert_edges(&[Edge {
2037            source_id: caller.id.clone(),
2038            target_name: "helper".to_string(),
2039            target_id: Some(h2.id.clone()),
2040            kind: EdgeKind::Calls,
2041            file_path: caller.file_path.clone(),
2042            line: 2,
2043            provenance: None,
2044        }])
2045        .unwrap();
2046        let hops = db
2047            .trace("caller", "helper", 8)
2048            .unwrap()
2049            .expect("caller→helper");
2050        assert_eq!(hops.len(), 1);
2051        assert_eq!(hops[0].source_id, caller.id, "hop names the exact source");
2052    }
2053
2054    #[test]
2055    fn test_hierarchy_query() {
2056        let db = Database::open_memory().unwrap();
2057
2058        let parent = test_symbol("Animal", SymbolKind::Class, "a.py", 1);
2059        let child = test_symbol("Dog", SymbolKind::Class, "a.py", 10);
2060        db.insert_symbols(&[parent, child.clone()]).unwrap();
2061
2062        db.insert_edge(&Edge {
2063            source_id: child.id.clone(),
2064            target_name: "Animal".to_string(),
2065            target_id: None,
2066            kind: EdgeKind::Inherits,
2067            file_path: "a.py".to_string(),
2068            line: 10,
2069            provenance: None,
2070        })
2071        .unwrap();
2072
2073        let pairs = db.hierarchy("Dog").unwrap();
2074        assert_eq!(pairs.len(), 1);
2075        assert_eq!(pairs[0].0, "Dog");
2076        assert_eq!(pairs[0].1, "Animal");
2077    }
2078
2079    #[test]
2080    fn test_file_deps_query() {
2081        let db = Database::open_memory().unwrap();
2082
2083        let import_sym = test_symbol("os", SymbolKind::Import, "main.py", 1);
2084        db.insert_symbol(&import_sym).unwrap();
2085
2086        db.insert_edge(&Edge {
2087            source_id: import_sym.id.clone(),
2088            target_name: "os".to_string(),
2089            target_id: None,
2090            kind: EdgeKind::Imports,
2091            file_path: "main.py".to_string(),
2092            line: 1,
2093            provenance: None,
2094        })
2095        .unwrap();
2096
2097        let deps = db.file_deps("main.py").unwrap();
2098        assert_eq!(deps.len(), 1);
2099        assert_eq!(deps[0].target_name, "os");
2100    }
2101
2102    #[test]
2103    fn test_remove_file_clears_all_data() {
2104        let db = Database::open_memory().unwrap();
2105
2106        let sym = test_symbol("foo", SymbolKind::Function, "test.py", 1);
2107        db.insert_symbol(&sym).unwrap();
2108        db.insert_edge(&Edge {
2109            source_id: sym.id.clone(),
2110            target_name: "bar".to_string(),
2111            target_id: None,
2112            kind: EdgeKind::Calls,
2113            file_path: "test.py".to_string(),
2114            line: 5,
2115            provenance: None,
2116        })
2117        .unwrap();
2118        db.upsert_file(&FileInfo {
2119            path: "test.py".to_string(),
2120            last_modified: 0.0,
2121            hash: "abc".to_string(),
2122            language: "python".to_string(),
2123            num_symbols: 1,
2124        })
2125        .unwrap();
2126
2127        db.remove_file("test.py").unwrap();
2128
2129        assert!(db.outline("test.py").unwrap().is_empty());
2130        assert!(db.get_file("test.py").unwrap().is_none());
2131    }
2132
2133    #[test]
2134    fn test_refs_with_kind_filter() {
2135        let db = Database::open_memory().unwrap();
2136        let parent = test_symbol("AuthService", SymbolKind::Class, "a.py", 1);
2137        let child = test_symbol("AdminService", SymbolKind::Class, "a.py", 20);
2138        let caller = test_symbol("login", SymbolKind::Function, "b.py", 1);
2139        db.insert_symbols(&[parent.clone(), child.clone(), caller.clone()])
2140            .unwrap();
2141
2142        db.insert_edges(&[
2143            Edge {
2144                source_id: child.id.clone(),
2145                target_name: "AuthService".to_string(),
2146                target_id: None,
2147                kind: EdgeKind::Inherits,
2148                file_path: "a.py".to_string(),
2149                line: 20,
2150                provenance: None,
2151            },
2152            Edge {
2153                source_id: caller.id.clone(),
2154                target_name: "AuthService".to_string(),
2155                target_id: None,
2156                kind: EdgeKind::Calls,
2157                file_path: "b.py".to_string(),
2158                line: 5,
2159                provenance: None,
2160            },
2161        ])
2162        .unwrap();
2163
2164        // No filter → both edges
2165        let all = db.refs("AuthService", None).unwrap();
2166        assert_eq!(all.len(), 2);
2167
2168        // Filter inherits only
2169        let inherits = db.refs("AuthService", Some(EdgeKind::Inherits)).unwrap();
2170        assert_eq!(inherits.len(), 1);
2171        assert_eq!(inherits[0].0.kind, EdgeKind::Inherits);
2172
2173        // Filter calls only
2174        let calls = db.refs("AuthService", Some(EdgeKind::Calls)).unwrap();
2175        assert_eq!(calls.len(), 1);
2176        assert_eq!(calls[0].0.kind, EdgeKind::Calls);
2177
2178        // Filter with no matches
2179        let raises = db.refs("AuthService", Some(EdgeKind::Raises)).unwrap();
2180        assert!(raises.is_empty());
2181    }
2182
2183    #[test]
2184    fn test_refs_matches_via_resolved_target_id_short_name() {
2185        // The edge's literal target_name is qualified (never equals the short
2186        // name), so it only matches `refs("BaseService")` through its resolved
2187        // target_id → a symbol named "BaseService". Guards the `target_id IN
2188        // (SELECT id ... WHERE name = ?)` arm of refs() against regressing to a
2189        // plain `target_name = ?` match. Mirrors the kind-filtered branch too.
2190        let db = Database::open_memory().unwrap();
2191        let base = test_symbol("BaseService", SymbolKind::Class, "auth/service.php", 1);
2192        let child = test_symbol("AuthService", SymbolKind::Class, "auth/service.php", 30);
2193        db.insert_symbols(&[base.clone(), child.clone()]).unwrap();
2194        db.insert_edge(&Edge::new(
2195            &child.id,
2196            "App\\Auth\\BaseService",
2197            EdgeKind::Inherits,
2198            "auth/service.php",
2199            30,
2200        ))
2201        .unwrap();
2202        db.resolve_edges().unwrap();
2203
2204        // Short name finds the edge only via the resolved target_id arm.
2205        let by_short = db.refs("BaseService", None).unwrap();
2206        assert_eq!(by_short.len(), 1, "short name must match via target_id");
2207        assert_eq!(by_short[0].0.target_id.as_ref().unwrap(), &base.id);
2208
2209        // Same through the kind-filtered branch.
2210        let by_short_kind = db.refs("BaseService", Some(EdgeKind::Inherits)).unwrap();
2211        assert_eq!(by_short_kind.len(), 1);
2212
2213        // A non-matching kind filter still excludes it.
2214        assert!(db
2215            .refs("BaseService", Some(EdgeKind::Calls))
2216            .unwrap()
2217            .is_empty());
2218    }
2219
2220    #[test]
2221    fn test_search_exact_match_ranks_first() {
2222        let db = Database::open_memory().unwrap();
2223        let exact = test_symbol("parse_config", SymbolKind::Function, "a.py", 1);
2224        let prefix = test_symbol("parse_config_file", SymbolKind::Function, "a.py", 10);
2225        let substr = test_symbol("get_parse_config", SymbolKind::Function, "a.py", 20);
2226        db.insert_symbols(&[exact.clone(), prefix, substr]).unwrap();
2227
2228        let results = db.search("parse_config", None, None, 20).unwrap();
2229        assert_eq!(results.len(), 3);
2230        assert_eq!(results[0].name, "parse_config");
2231    }
2232
2233    #[test]
2234    fn test_search_definitions_outrank_variables() {
2235        let db = Database::open_memory().unwrap();
2236        // Variables with exact match on "token"
2237        let var1 = test_symbol("token", SymbolKind::Variable, "routes/auth.ts", 20);
2238        let var2 = test_symbol("token", SymbolKind::Variable, "routes/admin.ts", 11);
2239        // Class with prefix match
2240        let class = test_symbol("TokenError", SymbolKind::Class, "auth/tokens.ts", 14);
2241        // Function with substring match
2242        let func = test_symbol("validateToken", SymbolKind::Function, "auth/tokens.ts", 59);
2243        // Class with substring match
2244        let subclass = test_symbol("ExpiredTokenError", SymbolKind::Class, "auth/tokens.ts", 22);
2245        db.insert_symbols(&[var1, var2, class, func, subclass])
2246            .unwrap();
2247
2248        let results = db.search("token", None, None, 20).unwrap();
2249        assert_eq!(results.len(), 5);
2250        // Definitions (class, function) should all rank above variables
2251        let def_names: Vec<&str> = results[..3].iter().map(|s| s.name.as_str()).collect();
2252        assert!(def_names.contains(&"TokenError"));
2253        assert!(def_names.contains(&"validateToken"));
2254        assert!(def_names.contains(&"ExpiredTokenError"));
2255        // Variables should be last
2256        assert_eq!(results[3].name, "token");
2257        assert_eq!(results[4].name, "token");
2258    }
2259
2260    #[test]
2261    fn test_search_prefix_match() {
2262        let db = Database::open_memory().unwrap();
2263        let a = test_symbol("parse_config", SymbolKind::Function, "a.py", 1);
2264        let b = test_symbol("parse_args", SymbolKind::Function, "a.py", 10);
2265        let c = test_symbol("unrelated", SymbolKind::Function, "a.py", 20);
2266        db.insert_symbols(&[a, b, c]).unwrap();
2267
2268        let results = db.search("parse", None, None, 20).unwrap();
2269        assert_eq!(results.len(), 2);
2270        let names: Vec<&str> = results.iter().map(|s| s.name.as_str()).collect();
2271        assert!(names.contains(&"parse_config"));
2272        assert!(names.contains(&"parse_args"));
2273    }
2274
2275    #[test]
2276    fn test_search_substring_match() {
2277        let db = Database::open_memory().unwrap();
2278        let a = test_symbol("parse_config", SymbolKind::Function, "a.py", 1);
2279        let b = test_symbol("get_config", SymbolKind::Function, "a.py", 10);
2280        let c = test_symbol("unrelated", SymbolKind::Function, "a.py", 20);
2281        db.insert_symbols(&[a, b, c]).unwrap();
2282
2283        let results = db.search("config", None, None, 20).unwrap();
2284        assert_eq!(results.len(), 2);
2285        let names: Vec<&str> = results.iter().map(|s| s.name.as_str()).collect();
2286        assert!(names.contains(&"parse_config"));
2287        assert!(names.contains(&"get_config"));
2288    }
2289
2290    #[test]
2291    fn test_search_case_insensitive() {
2292        let db = Database::open_memory().unwrap();
2293        let sym = test_symbol("parse_config", SymbolKind::Function, "a.py", 1);
2294        db.insert_symbol(&sym).unwrap();
2295
2296        let results = db.search("Parse", None, None, 20).unwrap();
2297        assert_eq!(results.len(), 1);
2298        assert_eq!(results[0].name, "parse_config");
2299    }
2300
2301    #[test]
2302    fn test_search_kind_filter() {
2303        let db = Database::open_memory().unwrap();
2304        let func = test_symbol("parse_config", SymbolKind::Function, "a.py", 1);
2305        let class = test_symbol("parse_result", SymbolKind::Class, "a.py", 10);
2306        db.insert_symbols(&[func, class]).unwrap();
2307
2308        let results = db
2309            .search("parse", Some(SymbolKind::Function), None, 20)
2310            .unwrap();
2311        assert_eq!(results.len(), 1);
2312        assert_eq!(results[0].kind, SymbolKind::Function);
2313    }
2314
2315    #[test]
2316    fn test_search_file_filter() {
2317        let db = Database::open_memory().unwrap();
2318        let a = test_symbol("parse_config", SymbolKind::Function, "src/a.rs", 1);
2319        let b = test_symbol("parse_config", SymbolKind::Function, "src/b.rs", 1);
2320        db.insert_symbols(&[a, b]).unwrap();
2321
2322        let results = db.search("parse", None, Some("src/a.rs"), 20).unwrap();
2323        assert_eq!(results.len(), 1);
2324        assert_eq!(results[0].file_path, "src/a.rs");
2325    }
2326
2327    #[test]
2328    fn test_search_empty_query_returns_error() {
2329        let db = Database::open_memory().unwrap();
2330        let err = db.search("", None, None, 20).unwrap_err();
2331        assert!(err.to_string().contains("cannot be empty"));
2332    }
2333
2334    #[test]
2335    fn test_search_zero_limit_returns_error() {
2336        let db = Database::open_memory().unwrap();
2337        let err = db.search("parse", None, None, 0).unwrap_err();
2338        assert!(err.to_string().contains("at least 1"));
2339    }
2340
2341    #[test]
2342    fn test_search_limit_caps_results() {
2343        let db = Database::open_memory().unwrap();
2344        // Insert 5 symbols all matching "fn"
2345        for i in 0..5u32 {
2346            let sym = test_symbol(&format!("fn_{i}"), SymbolKind::Function, "a.py", i * 10 + 1);
2347            db.insert_symbol(&sym).unwrap();
2348        }
2349        let results = db.search("fn", None, None, 3).unwrap();
2350        assert_eq!(results.len(), 3);
2351    }
2352
2353    #[test]
2354    fn test_search_limit_one_returns_top_ranked() {
2355        let db = Database::open_memory().unwrap();
2356        let exact = test_symbol("resolve", SymbolKind::Function, "a.py", 1);
2357        let prefix = test_symbol("resolve_edges", SymbolKind::Function, "a.py", 10);
2358        db.insert_symbols(&[exact, prefix]).unwrap();
2359
2360        let results = db.search("resolve", None, None, 1).unwrap();
2361        assert_eq!(results.len(), 1);
2362        assert_eq!(results[0].name, "resolve");
2363    }
2364
2365    #[test]
2366    fn test_search_wildcard_chars_treated_as_literals() {
2367        let db = Database::open_memory().unwrap();
2368        let sym = test_symbol("get_foo", SymbolKind::Function, "a.py", 1);
2369        let unrelated = test_symbol("getXfoo", SymbolKind::Function, "a.py", 10);
2370        db.insert_symbols(&[sym, unrelated]).unwrap();
2371
2372        // "get_foo" with literal underscore should NOT match "getXfoo"
2373        let results = db.search("get_foo", None, None, 20).unwrap();
2374        assert_eq!(results.len(), 1);
2375        assert_eq!(results[0].name, "get_foo");
2376    }
2377
2378    #[test]
2379    fn test_search_percent_treated_as_literal() {
2380        let db = Database::open_memory().unwrap();
2381        // No symbol contains a literal %, so searching for "%" should return empty
2382        let sym = test_symbol("get_config", SymbolKind::Function, "a.py", 1);
2383        db.insert_symbol(&sym).unwrap();
2384
2385        let results = db.search("%", None, None, 20).unwrap();
2386        assert!(results.is_empty(), "% should not act as a wildcard");
2387    }
2388
2389    // ── RAG: Symbol Content Tests ──
2390
2391    #[test]
2392    fn test_upsert_and_get_symbol_content() {
2393        let db = Database::open_memory().unwrap();
2394        let sym = test_symbol("my_func", SymbolKind::Function, "a.py", 1);
2395        db.insert_symbol(&sym).unwrap();
2396
2397        db.upsert_symbol_content(
2398            &sym.id,
2399            "my_func",
2400            "def my_func(): pass",
2401            "// File: a.py\n// Type: function\n// Name: my_func",
2402        )
2403        .unwrap();
2404
2405        let result = db.get_symbol_content(&sym.id).unwrap();
2406        assert!(result.is_some());
2407        let (content, header) = result.unwrap();
2408        assert_eq!(content, "def my_func(): pass");
2409        assert!(header.contains("my_func"));
2410    }
2411
2412    #[test]
2413    fn test_insert_symbol_contents_batch() {
2414        let db = Database::open_memory().unwrap();
2415        let sym1 = test_symbol("foo", SymbolKind::Function, "a.py", 1);
2416        let sym2 = test_symbol("bar", SymbolKind::Function, "a.py", 10);
2417        db.insert_symbols(&[sym1.clone(), sym2.clone()]).unwrap();
2418
2419        let items = vec![
2420            (
2421                sym1.id.clone(),
2422                "foo".to_string(),
2423                "def foo(): pass".to_string(),
2424                "header1".to_string(),
2425            ),
2426            (
2427                sym2.id.clone(),
2428                "bar".to_string(),
2429                "def bar(): pass".to_string(),
2430                "header2".to_string(),
2431            ),
2432        ];
2433        db.insert_symbol_contents(&items).unwrap();
2434
2435        assert_eq!(db.symbol_content_count().unwrap(), 2);
2436        assert!(db.get_symbol_content(&sym1.id).unwrap().is_some());
2437        assert!(db.get_symbol_content(&sym2.id).unwrap().is_some());
2438    }
2439
2440    #[test]
2441    fn test_clear_symbol_content_for_file() {
2442        let db = Database::open_memory().unwrap();
2443        let sym1 = test_symbol("foo", SymbolKind::Function, "a.py", 1);
2444        let sym2 = test_symbol("bar", SymbolKind::Function, "b.py", 1);
2445        db.insert_symbols(&[sym1.clone(), sym2.clone()]).unwrap();
2446
2447        db.upsert_symbol_content(&sym1.id, "foo", "content1", "header1")
2448            .unwrap();
2449        db.upsert_symbol_content(&sym2.id, "bar", "content2", "header2")
2450            .unwrap();
2451        assert_eq!(db.symbol_content_count().unwrap(), 2);
2452
2453        db.clear_symbol_content_for_file("a.py").unwrap();
2454        assert_eq!(db.symbol_content_count().unwrap(), 1);
2455        assert!(db.get_symbol_content(&sym1.id).unwrap().is_none());
2456        assert!(db.get_symbol_content(&sym2.id).unwrap().is_some());
2457    }
2458
2459    // ── RAG: FTS5 Tests ──
2460
2461    #[test]
2462    fn test_fts5_search_by_content() {
2463        let db = Database::open_memory().unwrap();
2464        let sym = test_symbol("validate_token", SymbolKind::Function, "auth.py", 1);
2465        db.insert_symbol(&sym).unwrap();
2466
2467        db.upsert_symbol_content(
2468            &sym.id,
2469            "validate_token",
2470            "def validate_token(token: str) -> bool:\n    return token.is_valid()",
2471            "// File: auth.py",
2472        )
2473        .unwrap();
2474
2475        // Search by content keyword
2476        let results = db.fts5_search("\"validate\"", 10).unwrap();
2477        assert!(!results.is_empty());
2478        assert_eq!(results[0], sym.id);
2479    }
2480
2481    #[test]
2482    fn test_fts5_search_no_match() {
2483        let db = Database::open_memory().unwrap();
2484        let sym = test_symbol("foo", SymbolKind::Function, "a.py", 1);
2485        db.insert_symbol(&sym).unwrap();
2486        db.upsert_symbol_content(&sym.id, "foo", "def foo(): pass", "header")
2487            .unwrap();
2488
2489        let results = db.fts5_search("\"nonexistent_term_xyz\"", 10).unwrap();
2490        assert!(results.is_empty());
2491    }
2492
2493    #[test]
2494    fn fts5_drops_old_content_when_symbol_content_is_replaced() {
2495        // Re-indexing a symbol (INSERT OR REPLACE on symbol_content) must not
2496        // leave the previous content searchable. Without an explicit delete the
2497        // FTS5 external-content delete trigger does not fire on REPLACE-conflict
2498        // (recursive_triggers is off), so a stale secret stays searchable.
2499        let db = Database::open_memory().unwrap();
2500        let sym = test_symbol("load", SymbolKind::Function, "a.py", 1);
2501        db.insert_symbol(&sym).unwrap();
2502
2503        db.upsert_symbol_content(&sym.id, "load", "key = ghp_oldsecrettoken_value", "h")
2504            .unwrap();
2505        assert!(!db
2506            .fts5_search("\"ghp_oldsecrettoken_value\"", 10)
2507            .unwrap()
2508            .is_empty());
2509
2510        db.upsert_symbol_content(&sym.id, "load", "key = [REDACTED_SECRET]", "h")
2511            .unwrap();
2512
2513        // Assert against the raw FTS index, not the JOIN-filtered fts5_search:
2514        // an orphaned FTS row survives the JOIN filter but still leaks the
2515        // plaintext token at the index level.
2516        let stale: i64 = db
2517            .conn
2518            .query_row(
2519                "SELECT count(*) FROM symbol_fts WHERE symbol_fts MATCH 'ghp_oldsecrettoken_value'",
2520                [],
2521                |r| r.get(0),
2522            )
2523            .unwrap();
2524        assert_eq!(stale, 0, "old plaintext must not remain in the FTS index");
2525        assert_eq!(db.symbol_content_count().unwrap(), 1);
2526    }
2527
2528    // ── RAG: Embedding Map Tests ──
2529
2530    #[test]
2531    fn test_get_or_create_embedding_id() {
2532        let db = Database::open_memory().unwrap();
2533
2534        let id1 = db.get_or_create_embedding_id("a.py:foo:1").unwrap();
2535        let id2 = db.get_or_create_embedding_id("a.py:foo:1").unwrap();
2536        let id3 = db.get_or_create_embedding_id("b.py:bar:5").unwrap();
2537
2538        assert_eq!(id1, id2, "same symbol should return same ID");
2539        assert_ne!(id1, id3, "different symbols should get different IDs");
2540    }
2541
2542    #[test]
2543    fn test_symbol_id_for_embedding() {
2544        let db = Database::open_memory().unwrap();
2545        let eid = db.get_or_create_embedding_id("test:sym:1").unwrap();
2546
2547        let sym_id = db.symbol_id_for_embedding(eid).unwrap();
2548        assert_eq!(sym_id, Some("test:sym:1".to_string()));
2549
2550        let none = db.symbol_id_for_embedding(99999).unwrap();
2551        assert!(none.is_none());
2552    }
2553
2554    #[test]
2555    fn test_symbol_ids_for_embeddings_batch() {
2556        let db = Database::open_memory().unwrap();
2557        let eid1 = db.get_or_create_embedding_id("a:foo:1").unwrap();
2558        let eid2 = db.get_or_create_embedding_id("b:bar:2").unwrap();
2559
2560        let results = db.symbol_ids_for_embeddings(&[eid1, eid2]).unwrap();
2561        assert_eq!(results.len(), 2);
2562    }
2563
2564    // ── RAG: Vector Storage Tests ──
2565
2566    #[test]
2567    fn test_upsert_and_search_embedding() {
2568        let db = Database::open_memory().unwrap();
2569        let eid = db.get_or_create_embedding_id("a:foo:1").unwrap();
2570
2571        // Create a simple 384-dim vector
2572        let mut embedding = vec![0.0f32; 384];
2573        embedding[0] = 1.0;
2574        let bytes: Vec<u8> = embedding.iter().flat_map(|f| f.to_le_bytes()).collect();
2575
2576        db.upsert_embedding(eid, &bytes).unwrap();
2577
2578        // Search with a similar vector
2579        let query = bytes.clone();
2580        let results = db.vector_search(&query, 5).unwrap();
2581
2582        assert_eq!(results.len(), 1);
2583        assert_eq!(results[0].0, eid);
2584        assert!(
2585            results[0].1 < 0.01,
2586            "self-match should have near-zero distance"
2587        );
2588    }
2589
2590    #[test]
2591    fn test_insert_embeddings_batch() {
2592        let db = Database::open_memory().unwrap();
2593        let eid1 = db.get_or_create_embedding_id("a:foo:1").unwrap();
2594        let eid2 = db.get_or_create_embedding_id("b:bar:2").unwrap();
2595
2596        let make_vec = |val: f32| -> Vec<u8> {
2597            let v = vec![val; 384];
2598            v.iter().flat_map(|f| f.to_le_bytes()).collect()
2599        };
2600
2601        let items = vec![(eid1, make_vec(0.1)), (eid2, make_vec(0.9))];
2602        db.insert_embeddings(&items).unwrap();
2603
2604        assert_eq!(db.embedding_count().unwrap(), 2);
2605    }
2606
2607    #[test]
2608    fn test_has_embedding() {
2609        let db = Database::open_memory().unwrap();
2610        assert!(!db.has_embedding("nonexistent").unwrap());
2611
2612        let eid = db.get_or_create_embedding_id("a:foo:1").unwrap();
2613        // Map exists but no vector yet
2614        assert!(!db.has_embedding("a:foo:1").unwrap());
2615
2616        // Insert vector
2617        let bytes: Vec<u8> = vec![0.0f32; 384]
2618            .iter()
2619            .flat_map(|f| f.to_le_bytes())
2620            .collect();
2621        db.upsert_embedding(eid, &bytes).unwrap();
2622        assert!(db.has_embedding("a:foo:1").unwrap());
2623    }
2624
2625    #[test]
2626    fn test_clear_all_embeddings() {
2627        let db = Database::open_memory().unwrap();
2628        let eid1 = db.get_or_create_embedding_id("a:foo:1").unwrap();
2629        let eid2 = db.get_or_create_embedding_id("b:bar:2").unwrap();
2630
2631        let bytes: Vec<u8> = vec![0.0f32; 384]
2632            .iter()
2633            .flat_map(|f| f.to_le_bytes())
2634            .collect();
2635        db.upsert_embedding(eid1, &bytes).unwrap();
2636        db.upsert_embedding(eid2, &bytes).unwrap();
2637        assert_eq!(db.embedding_count().unwrap(), 2);
2638
2639        db.clear_all_embeddings().unwrap();
2640        assert_eq!(db.embedding_count().unwrap(), 0);
2641    }
2642
2643    #[test]
2644    fn embedding_count_excludes_orphan_map_rows() {
2645        let db = Database::open_memory().unwrap();
2646        // Orphan map row (no vector yet) must not count as a usable embedding.
2647        let _eid = db.get_or_create_embedding_id("a:foo:1").unwrap();
2648        assert_eq!(db.embedding_count().unwrap(), 0);
2649
2650        // Once a vector is written it counts.
2651        let eid = db.get_or_create_embedding_id("a:foo:1").unwrap();
2652        let bytes: Vec<u8> = vec![0.0f32; 384]
2653            .iter()
2654            .flat_map(|f| f.to_le_bytes())
2655            .collect();
2656        db.upsert_embedding(eid, &bytes).unwrap();
2657        assert_eq!(db.embedding_count().unwrap(), 1);
2658    }
2659
2660    #[test]
2661    fn test_symbols_needing_embeddings() {
2662        let db = Database::open_memory().unwrap();
2663        let sym1 = test_symbol("foo", SymbolKind::Function, "a.py", 1);
2664        let sym2 = test_symbol("bar", SymbolKind::Function, "a.py", 10);
2665        db.insert_symbols(&[sym1.clone(), sym2.clone()]).unwrap();
2666
2667        // Add content for both
2668        db.upsert_symbol_content(&sym1.id, "foo", "def foo(): pass", "header")
2669            .unwrap();
2670        db.upsert_symbol_content(&sym2.id, "bar", "def bar(): pass", "header")
2671            .unwrap();
2672
2673        // Both need embeddings initially
2674        let needing = db.symbols_needing_embeddings().unwrap();
2675        assert_eq!(needing.len(), 2);
2676
2677        // Embed one
2678        let eid = db.get_or_create_embedding_id(&sym1.id).unwrap();
2679        let bytes: Vec<u8> = vec![0.0f32; 384]
2680            .iter()
2681            .flat_map(|f| f.to_le_bytes())
2682            .collect();
2683        db.upsert_embedding(eid, &bytes).unwrap();
2684
2685        // Only one needs embedding now
2686        let needing = db.symbols_needing_embeddings().unwrap();
2687        assert_eq!(needing.len(), 1);
2688        assert_eq!(needing[0], sym2.id);
2689    }
2690
2691    #[test]
2692    fn test_clear_rag_data_for_file() {
2693        let db = Database::open_memory().unwrap();
2694        let sym1 = test_symbol("foo", SymbolKind::Function, "a.py", 1);
2695        let sym2 = test_symbol("bar", SymbolKind::Function, "b.py", 1);
2696        db.insert_symbols(&[sym1.clone(), sym2.clone()]).unwrap();
2697
2698        db.upsert_symbol_content(&sym1.id, "foo", "content1", "header1")
2699            .unwrap();
2700        db.upsert_symbol_content(&sym2.id, "bar", "content2", "header2")
2701            .unwrap();
2702
2703        let eid1 = db.get_or_create_embedding_id(&sym1.id).unwrap();
2704        let eid2 = db.get_or_create_embedding_id(&sym2.id).unwrap();
2705        let bytes: Vec<u8> = vec![0.0f32; 384]
2706            .iter()
2707            .flat_map(|f| f.to_le_bytes())
2708            .collect();
2709        db.upsert_embedding(eid1, &bytes).unwrap();
2710        db.upsert_embedding(eid2, &bytes).unwrap();
2711
2712        // Clear RAG data for a.py only
2713        db.clear_rag_data_for_file("a.py").unwrap();
2714
2715        // a.py data gone
2716        assert!(db.get_symbol_content(&sym1.id).unwrap().is_none());
2717        assert!(!db.has_embedding(&sym1.id).unwrap());
2718
2719        // b.py data intact
2720        assert!(db.get_symbol_content(&sym2.id).unwrap().is_some());
2721        assert!(db.has_embedding(&sym2.id).unwrap());
2722    }
2723
2724    #[test]
2725    fn clear_embeddings_for_symbols_drops_only_named_ids() {
2726        let db = Database::open_memory().unwrap();
2727        let sym1 = test_symbol("foo", SymbolKind::Function, "a.py", 1);
2728        let sym2 = test_symbol("bar", SymbolKind::Function, "a.py", 10);
2729        db.insert_symbols(&[sym1.clone(), sym2.clone()]).unwrap();
2730        db.upsert_symbol_content(&sym1.id, "foo", "def foo(): pass", "header")
2731            .unwrap();
2732        db.upsert_symbol_content(&sym2.id, "bar", "def bar(): pass", "header")
2733            .unwrap();
2734
2735        let bytes: Vec<u8> = vec![0.0f32; 384]
2736            .iter()
2737            .flat_map(|f| f.to_le_bytes())
2738            .collect();
2739        for sym in [&sym1, &sym2] {
2740            let eid = db.get_or_create_embedding_id(&sym.id).unwrap();
2741            db.upsert_embedding(eid, &bytes).unwrap();
2742        }
2743        assert_eq!(db.embedding_count().unwrap(), 2);
2744
2745        let tx = db.begin_indexing_tx().unwrap();
2746        db.clear_embeddings_for_symbols_in_tx(std::slice::from_ref(&sym1.id))
2747            .unwrap();
2748        tx.commit().unwrap();
2749
2750        // sym1's embedding gone, sym2 intact, content untouched for both.
2751        assert!(!db.has_embedding(&sym1.id).unwrap());
2752        assert!(db.has_embedding(&sym2.id).unwrap());
2753        assert!(db.get_symbol_content(&sym1.id).unwrap().is_some());
2754        // The cleared symbol is now back in the needs-embedding set.
2755        let needing = db.symbols_needing_embeddings().unwrap();
2756        assert_eq!(needing, vec![sym1.id.clone()]);
2757    }
2758
2759    #[test]
2760    fn clear_embeddings_for_symbols_is_noop_for_unembedded_id() {
2761        let db = Database::open_memory().unwrap();
2762        let sym = test_symbol("foo", SymbolKind::Function, "a.py", 1);
2763        db.insert_symbols(std::slice::from_ref(&sym)).unwrap();
2764        db.upsert_symbol_content(&sym.id, "foo", "def foo(): pass", "header")
2765            .unwrap();
2766
2767        let tx = db.begin_indexing_tx().unwrap();
2768        db.clear_embeddings_for_symbols_in_tx(std::slice::from_ref(&sym.id))
2769            .unwrap();
2770        tx.commit().unwrap();
2771
2772        assert_eq!(db.embedding_count().unwrap(), 0);
2773    }
2774
2775    #[test]
2776    fn test_all_content_symbol_ids() {
2777        let db = Database::open_memory().unwrap();
2778        let sym1 = test_symbol("foo", SymbolKind::Function, "a.py", 1);
2779        let sym2 = test_symbol("bar", SymbolKind::Function, "b.py", 1);
2780        db.insert_symbols(&[sym1.clone(), sym2.clone()]).unwrap();
2781
2782        db.upsert_symbol_content(&sym1.id, "foo", "content1", "header1")
2783            .unwrap();
2784        db.upsert_symbol_content(&sym2.id, "bar", "content2", "header2")
2785            .unwrap();
2786
2787        let all = db.all_content_symbol_ids().unwrap();
2788        assert_eq!(all.len(), 2);
2789    }
2790
2791    #[test]
2792    fn test_symbols_needing_embeddings_excludes_variables() {
2793        let db = Database::open_memory().unwrap();
2794        let func = test_symbol("process", SymbolKind::Function, "a.py", 1);
2795        let var = test_symbol("MAX_RETRIES", SymbolKind::Variable, "a.py", 10);
2796        let cls = test_symbol("Service", SymbolKind::Class, "a.py", 20);
2797        db.insert_symbols(&[func.clone(), var.clone(), cls.clone()])
2798            .unwrap();
2799
2800        // Add content for all three
2801        db.upsert_symbol_content(&func.id, "process", "def process(): pass", "header")
2802            .unwrap();
2803        db.upsert_symbol_content(&var.id, "MAX_RETRIES", "MAX_RETRIES = 3", "header")
2804            .unwrap();
2805        db.upsert_symbol_content(&cls.id, "Service", "class Service: pass", "header")
2806            .unwrap();
2807
2808        // Only function and class should need embeddings (variable excluded)
2809        let needing = db.symbols_needing_embeddings().unwrap();
2810        assert_eq!(needing.len(), 2);
2811        assert!(!needing.contains(&var.id), "variables should be excluded");
2812        assert!(needing.contains(&func.id));
2813        assert!(needing.contains(&cls.id));
2814    }
2815
2816    #[test]
2817    fn test_all_content_symbol_ids_excludes_variables() {
2818        let db = Database::open_memory().unwrap();
2819        let func = test_symbol("foo", SymbolKind::Function, "a.py", 1);
2820        let var = test_symbol("MY_VAR", SymbolKind::Variable, "a.py", 10);
2821        let method = test_symbol("bar", SymbolKind::Method, "a.py", 20);
2822        db.insert_symbols(&[func.clone(), var.clone(), method.clone()])
2823            .unwrap();
2824
2825        db.upsert_symbol_content(&func.id, "foo", "def foo(): pass", "header")
2826            .unwrap();
2827        db.upsert_symbol_content(&var.id, "MY_VAR", "MY_VAR = 42", "header")
2828            .unwrap();
2829        db.upsert_symbol_content(&method.id, "bar", "def bar(self): pass", "header")
2830            .unwrap();
2831
2832        let all = db.all_content_symbol_ids().unwrap();
2833        assert_eq!(all.len(), 2, "variables should be excluded");
2834        assert!(!all.contains(&var.id));
2835    }
2836
2837    #[test]
2838    fn test_get_symbol_contents_batch() {
2839        let db = Database::open_memory().unwrap();
2840        let sym1 = test_symbol("foo", SymbolKind::Function, "a.py", 1);
2841        let sym2 = test_symbol("bar", SymbolKind::Function, "a.py", 10);
2842        let sym3 = test_symbol("baz", SymbolKind::Function, "a.py", 20);
2843        db.insert_symbols(&[sym1.clone(), sym2.clone(), sym3.clone()])
2844            .unwrap();
2845
2846        db.upsert_symbol_content(&sym1.id, "foo", "def foo(): pass", "h1")
2847            .unwrap();
2848        db.upsert_symbol_content(&sym2.id, "bar", "def bar(): pass", "h2")
2849            .unwrap();
2850        // sym3 has no content
2851
2852        let ids = vec![sym1.id.clone(), sym2.id.clone(), sym3.id.clone()];
2853        let map = db.get_symbol_contents_batch(&ids).unwrap();
2854        assert_eq!(map.len(), 2);
2855        assert!(map.contains_key(&sym1.id));
2856        assert!(map.contains_key(&sym2.id));
2857        assert!(!map.contains_key(&sym3.id));
2858        assert_eq!(map[&sym1.id].0, "def foo(): pass");
2859    }
2860
2861    #[test]
2862    fn test_get_symbol_contents_batch_empty() {
2863        let db = Database::open_memory().unwrap();
2864        let map = db.get_symbol_contents_batch(&[]).unwrap();
2865        assert!(map.is_empty());
2866    }
2867
2868    #[test]
2869    fn test_get_symbol_by_id() {
2870        let db = Database::open_memory().unwrap();
2871        let sym = test_symbol("foo", SymbolKind::Function, "a.py", 1);
2872        db.insert_symbol(&sym).unwrap();
2873
2874        let found = db.get_symbol(&sym.id).unwrap();
2875        assert!(found.is_some());
2876        assert_eq!(found.unwrap().name, "foo");
2877
2878        let not_found = db.get_symbol("nonexistent").unwrap();
2879        assert!(not_found.is_none());
2880    }
2881
2882    #[test]
2883    fn test_symbols_for_files_basic() {
2884        let db = Database::open_memory().unwrap();
2885        let s1 = test_symbol("func_a", SymbolKind::Function, "src/a.py", 1);
2886        let s2 = test_symbol("func_b", SymbolKind::Function, "src/a.py", 10);
2887        let s3 = test_symbol("ClassC", SymbolKind::Class, "src/b.py", 1);
2888        let s4 = test_symbol("func_d", SymbolKind::Function, "src/c.py", 1);
2889        db.insert_symbols(&[s1, s2, s3, s4]).unwrap();
2890
2891        // Query for two files
2892        let files = vec!["src/a.py".to_string(), "src/b.py".to_string()];
2893        let results = db.symbols_for_files(&files, None).unwrap();
2894        assert_eq!(results.len(), 3);
2895        assert_eq!(results[0].file_path, "src/a.py");
2896        assert_eq!(results[2].file_path, "src/b.py");
2897    }
2898
2899    #[test]
2900    fn test_symbols_for_files_kind_filter() {
2901        let db = Database::open_memory().unwrap();
2902        let s1 = test_symbol("func_a", SymbolKind::Function, "src/a.py", 1);
2903        let s2 = test_symbol("ClassB", SymbolKind::Class, "src/a.py", 10);
2904        db.insert_symbols(&[s1, s2]).unwrap();
2905
2906        let files = vec!["src/a.py".to_string()];
2907        let results = db
2908            .symbols_for_files(&files, Some(SymbolKind::Function))
2909            .unwrap();
2910        assert_eq!(results.len(), 1);
2911        assert_eq!(results[0].name, "func_a");
2912    }
2913
2914    #[test]
2915    fn test_symbols_for_files_empty_input() {
2916        let db = Database::open_memory().unwrap();
2917        let results = db.symbols_for_files(&[], None).unwrap();
2918        assert!(results.is_empty());
2919    }
2920
2921    #[test]
2922    fn test_symbols_for_files_no_matching_files() {
2923        let db = Database::open_memory().unwrap();
2924        let s1 = test_symbol("func_a", SymbolKind::Function, "src/a.py", 1);
2925        db.insert_symbol(&s1).unwrap();
2926
2927        let files = vec!["src/nonexistent.py".to_string()];
2928        let results = db.symbols_for_files(&files, None).unwrap();
2929        assert!(results.is_empty());
2930    }
2931
2932    // ── In-degree centrality tests ──
2933
2934    #[test]
2935    fn test_compute_in_degrees() {
2936        let db = Database::open_memory().unwrap();
2937        let s1 = test_symbol("func_a", SymbolKind::Function, "a.py", 1);
2938        let s2 = test_symbol("func_b", SymbolKind::Function, "b.py", 1);
2939        let s3 = test_symbol("func_c", SymbolKind::Function, "c.py", 1);
2940        db.insert_symbols(&[s1.clone(), s2.clone(), s3.clone()])
2941            .unwrap();
2942
2943        // func_b calls func_a (2 call sites), func_c calls func_a (1 call site)
2944        let e1 = Edge::new(&s2.id, "func_a", EdgeKind::Calls, "b.py", 5);
2945        let e2 = Edge::new(&s2.id, "func_a", EdgeKind::Calls, "b.py", 10);
2946        let e3 = Edge::new(&s3.id, "func_a", EdgeKind::Calls, "c.py", 3);
2947        // func_c also calls func_b
2948        let e4 = Edge::new(&s3.id, "func_b", EdgeKind::Calls, "c.py", 7);
2949        db.insert_edges(&[e1, e2, e3, e4]).unwrap();
2950        db.resolve_edges().unwrap();
2951        db.compute_in_degrees().unwrap();
2952
2953        let sym_a = db.get_symbol(&s1.id).unwrap().unwrap();
2954        let sym_b = db.get_symbol(&s2.id).unwrap().unwrap();
2955        let sym_c = db.get_symbol(&s3.id).unwrap().unwrap();
2956
2957        assert_eq!(sym_a.in_degree, 3, "func_a should have 3 incoming edges");
2958        assert_eq!(sym_b.in_degree, 1, "func_b should have 1 incoming edge");
2959        assert_eq!(sym_c.in_degree, 0, "func_c should have 0 incoming edges");
2960    }
2961
2962    #[test]
2963    fn test_compute_in_degrees_resets() {
2964        let db = Database::open_memory().unwrap();
2965        let s1 = test_symbol("func_a", SymbolKind::Function, "a.py", 1);
2966        db.insert_symbol(&s1).unwrap();
2967
2968        // Manually set in_degree to 99
2969        db.conn
2970            .execute(
2971                "UPDATE symbols SET in_degree = 99 WHERE id = ?1",
2972                params![s1.id],
2973            )
2974            .unwrap();
2975
2976        // compute_in_degrees should reset to 0 (no edges)
2977        db.compute_in_degrees().unwrap();
2978        let sym = db.get_symbol(&s1.id).unwrap().unwrap();
2979        assert_eq!(sym.in_degree, 0);
2980    }
2981
2982    #[test]
2983    fn test_top_symbols_ordered_by_centrality() {
2984        let db = Database::open_memory().unwrap();
2985        let s1 = test_symbol("hub", SymbolKind::Function, "a.py", 1);
2986        let s2 = test_symbol("leaf", SymbolKind::Function, "b.py", 1);
2987        let s3 = test_symbol("mid", SymbolKind::Function, "c.py", 1);
2988        db.insert_symbols(&[s1.clone(), s2.clone(), s3.clone()])
2989            .unwrap();
2990
2991        // Set in-degrees directly for testing
2992        db.conn
2993            .execute(
2994                "UPDATE symbols SET in_degree = 10 WHERE id = ?1",
2995                params![s1.id],
2996            )
2997            .unwrap();
2998        db.conn
2999            .execute(
3000                "UPDATE symbols SET in_degree = 1 WHERE id = ?1",
3001                params![s2.id],
3002            )
3003            .unwrap();
3004        db.conn
3005            .execute(
3006                "UPDATE symbols SET in_degree = 5 WHERE id = ?1",
3007                params![s3.id],
3008            )
3009            .unwrap();
3010
3011        let top = db.top_symbols(10).unwrap();
3012        assert_eq!(top.len(), 3);
3013        assert_eq!(top[0].name, "hub");
3014        assert_eq!(top[0].in_degree, 10);
3015        assert_eq!(top[1].name, "mid");
3016        assert_eq!(top[2].name, "leaf");
3017    }
3018
3019    #[test]
3020    fn test_search_uses_in_degree_tiebreaker() {
3021        let db = Database::open_memory().unwrap();
3022        // Two functions with same name prefix, different centrality
3023        let s1 = test_symbol("parse_request", SymbolKind::Function, "a.py", 1);
3024        let s2 = test_symbol("parse_response", SymbolKind::Function, "b.py", 1);
3025        db.insert_symbols(&[s1.clone(), s2.clone()]).unwrap();
3026
3027        db.conn
3028            .execute(
3029                "UPDATE symbols SET in_degree = 20 WHERE id = ?1",
3030                params![s1.id],
3031            )
3032            .unwrap();
3033        db.conn
3034            .execute(
3035                "UPDATE symbols SET in_degree = 5 WHERE id = ?1",
3036                params![s2.id],
3037            )
3038            .unwrap();
3039
3040        let results = db.search("parse", None, None, 10).unwrap();
3041        assert_eq!(results.len(), 2);
3042        // parse_request (in_degree=20) should come before parse_response (in_degree=5)
3043        assert_eq!(results[0].name, "parse_request");
3044        assert_eq!(results[1].name, "parse_response");
3045    }
3046
3047    #[test]
3048    fn test_schema_version_stored() {
3049        let db = Database::open_memory().unwrap();
3050        let version = db.get_metadata("schema_version").unwrap();
3051        assert!(version.is_some());
3052        assert_eq!(version.unwrap(), SCHEMA_VERSION.to_string());
3053    }
3054
3055    // ── Scoped edge resolution tests ──
3056
3057    #[test]
3058    fn test_invalidate_dangling_edges_after_symbol_removal() {
3059        let db = Database::open_memory().unwrap();
3060
3061        // File A: defines foo
3062        let sym_a = test_symbol("foo", SymbolKind::Function, "a.py", 1);
3063        db.insert_symbol(&sym_a).unwrap();
3064
3065        // File B: calls foo (edge from B to A)
3066        let sym_b = test_symbol("bar", SymbolKind::Function, "b.py", 1);
3067        db.insert_symbol(&sym_b).unwrap();
3068        let edge = Edge::new(&sym_b.id, "foo", EdgeKind::Calls, "b.py", 5);
3069        db.insert_edge(&edge).unwrap();
3070
3071        // Resolve: edge should point to sym_a
3072        let resolved = db.resolve_edges().unwrap();
3073        assert_eq!(resolved, 1);
3074
3075        // Simulate: directly delete the symbol row (bypassing delete_symbol cascade)
3076        // to create a dangling edge reference
3077        db.conn
3078            .execute("DELETE FROM symbols WHERE id = ?1", params![sym_a.id])
3079            .unwrap();
3080
3081        // Invalidate dangling edges
3082        let dirty = std::collections::HashSet::from(["a.py".to_string()]);
3083        let invalidated = db.invalidate_edges_targeting(&dirty).unwrap();
3084        assert_eq!(invalidated, 1);
3085
3086        // Edge should now be unresolved
3087        let edges = db.callees("bar").unwrap();
3088        assert!(
3089            edges.iter().all(|e| e.target_id.is_none()),
3090            "edge should be unresolved after invalidation"
3091        );
3092    }
3093
3094    #[test]
3095    fn test_scoped_resolution_after_symbol_changes() {
3096        let db = Database::open_memory().unwrap();
3097
3098        // File A: defines foo
3099        let sym_a = test_symbol("foo", SymbolKind::Function, "a.py", 1);
3100        db.insert_symbol(&sym_a).unwrap();
3101
3102        // File B: calls foo
3103        let sym_b = test_symbol("bar", SymbolKind::Function, "b.py", 1);
3104        db.insert_symbol(&sym_b).unwrap();
3105        db.insert_edge(&Edge::new(&sym_b.id, "foo", EdgeKind::Calls, "b.py", 5))
3106            .unwrap();
3107
3108        // Resolve globally first
3109        db.resolve_edges().unwrap();
3110
3111        // Simulate re-indexing a.py: delete_symbol nullifies edges, then re-insert
3112        db.delete_symbol(&sym_a.id).unwrap();
3113        db.insert_symbol(&sym_a).unwrap();
3114
3115        // Scoped resolve should re-resolve the edge
3116        let dirty = std::collections::HashSet::from(["a.py".to_string()]);
3117        let re_resolved = db.resolve_edges_scoped(&dirty).unwrap();
3118        assert_eq!(re_resolved, 1);
3119    }
3120
3121    #[test]
3122    fn test_compute_in_degrees_scoped() {
3123        let db = Database::open_memory().unwrap();
3124
3125        let foo = test_symbol("foo", SymbolKind::Function, "a.py", 1);
3126        let bar = test_symbol("bar", SymbolKind::Function, "b.py", 1);
3127        let baz = test_symbol("baz", SymbolKind::Function, "c.py", 1);
3128        db.insert_symbol(&foo).unwrap();
3129        db.insert_symbol(&bar).unwrap();
3130        db.insert_symbol(&baz).unwrap();
3131
3132        // bar calls foo, baz calls foo
3133        db.insert_edge(&Edge::new(&bar.id, "foo", EdgeKind::Calls, "b.py", 5))
3134            .unwrap();
3135        db.insert_edge(&Edge::new(&baz.id, "foo", EdgeKind::Calls, "c.py", 3))
3136            .unwrap();
3137
3138        db.resolve_edges().unwrap();
3139        db.compute_in_degrees().unwrap();
3140
3141        // foo should have in_degree = 2
3142        let results = db.search("foo", None, None, 10).unwrap();
3143        assert_eq!(results[0].in_degree, 2);
3144
3145        // Now scope to just b.py
3146        let dirty = std::collections::HashSet::from(["b.py".to_string()]);
3147        db.compute_in_degrees_scoped(&dirty).unwrap();
3148
3149        // foo should still have in_degree = 2 (recomputed correctly)
3150        let results = db.search("foo", None, None, 10).unwrap();
3151        assert_eq!(results[0].in_degree, 2);
3152    }
3153
3154    #[test]
3155    fn test_tier2_import_resolution_plan_uses_kind_target_index() {
3156        // Plan regression for #109; SQL mirrors tier-2 in store/resolution.rs.
3157        let db = Database::open_memory().unwrap();
3158        let mut stmt = db
3159            .conn
3160            .prepare(
3161                "EXPLAIN QUERY PLAN SELECT s.id FROM symbols s
3162                 INNER JOIN edges ie ON ie.kind = 'imports' AND ie.target_name = ?1
3163                     AND ie.target_id IS NOT NULL
3164                 INNER JOIN symbols is2 ON is2.id = ie.source_id AND is2.file_path = ?2
3165                 INNER JOIN symbols resolved ON resolved.id = ie.target_id
3166                 WHERE s.name = ?1 AND s.kind != 'import'
3167                     AND s.file_path = resolved.file_path
3168                 LIMIT 1",
3169            )
3170            .unwrap();
3171        let plan = stmt
3172            .query_map(params!["x", "y"], |row| row.get::<_, String>(3))
3173            .unwrap()
3174            .collect::<std::result::Result<Vec<_>, _>>()
3175            .unwrap()
3176            .join("\n");
3177        assert!(
3178            plan.contains("idx_edges_kind_target"),
3179            "tier-2 must drive off edges(kind, target_name); got plan:\n{plan}"
3180        );
3181    }
3182
3183    #[test]
3184    fn test_refs_plan_uses_multi_index_or_not_full_scan() {
3185        // Plan regression: both refs() branches must resolve via a MULTI-INDEX OR
3186        // over the edge target indexes, never the old `OR sym2.name` full scan.
3187        let db = Database::open_memory().unwrap();
3188        // Populate + ANALYZE: a zero-row DB collapses every plan to a kind-only
3189        // scan, hiding the target bound. Selective target_names + half-resolved
3190        // target_ids make both MULTI-INDEX OR arms the cheapest plan.
3191        let syms: Vec<Symbol> = (0..400)
3192            .map(|i| test_symbol(&format!("s{i}"), SymbolKind::Function, "a.py", i))
3193            .collect();
3194        db.insert_symbols(&syms).unwrap();
3195        let edges: Vec<Edge> = (0..400)
3196            .map(|i| {
3197                let mut e = Edge::new(
3198                    &syms[i as usize].id,
3199                    format!("t{i}"),
3200                    EdgeKind::Calls,
3201                    "a.py",
3202                    i,
3203                );
3204                if i % 2 == 0 {
3205                    e.target_id = Some(syms[i as usize].id.clone());
3206                }
3207                e
3208            })
3209            .collect();
3210        db.insert_edges(&edges).unwrap();
3211        db.conn.execute_batch("ANALYZE;").unwrap();
3212
3213        let explain = |sql: &str| -> String {
3214            let mut stmt = db.conn.prepare(sql).unwrap();
3215            stmt.query_map(params!["x"], |row| row.get::<_, String>(3))
3216                .unwrap()
3217                .collect::<std::result::Result<Vec<_>, _>>()
3218                .unwrap()
3219                .join("\n")
3220        };
3221        let assert_no_edge_scan = |plan: &str, ctx: &str| {
3222            // Core invariant: edges reached by an index, never the old OR-join scan.
3223            assert!(
3224                !plan.contains("SCAN e\n")
3225                    && !plan.ends_with("SCAN e")
3226                    && !plan.contains("SCAN edges"),
3227                "refs() {ctx} must not full-scan edges; got plan:\n{plan}"
3228            );
3229        };
3230
3231        // Unfiltered branch: MULTI-INDEX OR. Assert the full EQP detail (trailing
3232        // `(target_name=` / `(target_id=`) so `idx_edges_target` isn't subsumed
3233        // by the `idx_edges_target_id` prefix.
3234        let unfiltered = explain(
3235            "EXPLAIN QUERY PLAN
3236             SELECT e.id FROM edges e
3237             LEFT JOIN symbols s ON e.source_id = s.id
3238             WHERE e.target_name = ?1
3239                OR e.target_id IN (SELECT id FROM symbols WHERE name = ?1)",
3240        );
3241        assert!(
3242            unfiltered.contains("MULTI-INDEX OR"),
3243            "refs() unfiltered must use a multi-index OR; got plan:\n{unfiltered}"
3244        );
3245        assert!(
3246            unfiltered.contains("idx_edges_target (target_name="),
3247            "refs() literal arm must seek idx_edges_target on target_name; got plan:\n{unfiltered}"
3248        );
3249        assert!(
3250            unfiltered.contains("idx_edges_target_id (target_id="),
3251            "refs() resolved arm must seek idx_edges_target_id on target_id; got plan:\n{unfiltered}"
3252        );
3253        assert_no_edge_scan(&unfiltered, "unfiltered");
3254
3255        // Kind-filtered branch: kind pushed into each OR arm so both stay
3256        // target-bounded (composite idx_edges_kind_target + idx_edges_target_id).
3257        let kind_filtered = explain(
3258            "EXPLAIN QUERY PLAN
3259             SELECT e.id FROM edges e
3260             LEFT JOIN symbols s ON e.source_id = s.id
3261             WHERE (e.target_name = ?1 AND e.kind = 'calls')
3262                OR (e.target_id IN (SELECT id FROM symbols WHERE name = ?1)
3263                    AND e.kind = 'calls')",
3264        );
3265        assert!(
3266            kind_filtered.contains("MULTI-INDEX OR"),
3267            "refs() kind-filtered must use a multi-index OR; got plan:\n{kind_filtered}"
3268        );
3269        assert!(
3270            kind_filtered.contains("idx_edges_kind_target (kind=? AND target_name="),
3271            "refs() kind-filtered literal arm must seek (kind, target_name); got plan:\n{kind_filtered}"
3272        );
3273        assert!(
3274            kind_filtered.contains("idx_edges_target_id (target_id="),
3275            "refs() kind-filtered resolved arm must seek target_id; got plan:\n{kind_filtered}"
3276        );
3277        assert_no_edge_scan(&kind_filtered, "kind-filtered");
3278    }
3279
3280    #[test]
3281    fn test_impact_recursive_step_avoids_full_edge_scan() {
3282        // Plan regression: the impact() recursive step must reach edges through
3283        // indexes, never a full SCAN + correlated subquery. The old
3284        // `JOIN edges e ON (e.target_name = i.source_name OR EXISTS(...))` form
3285        // scanned all edges per frontier row (~310ms at d2 on a real repo);
3286        // splitting the OR into two recursive arms keeps each on an index seek
3287        // (idx_edges_target and idx_edges_target_id). SQL mirrors impact().
3288        let db = Database::open_memory().unwrap();
3289        let mut stmt = db
3290            .conn
3291            .prepare(
3292                "EXPLAIN QUERY PLAN
3293                 WITH RECURSIVE impacted(edge_id, source_id, target_name, target_id,
3294                     kind, file_path, line, resolution_source, source_name, depth) AS (
3295                     SELECT e.id, e.source_id, e.target_name, e.target_id, e.kind,
3296                            e.file_path, e.line, e.resolution_source, s.name, 1
3297                     FROM edges e LEFT JOIN symbols s ON e.source_id = s.id
3298                     WHERE e.target_name = ?1
3299                        OR e.target_id IN (SELECT id FROM symbols WHERE name = ?1)
3300                     UNION
3301                     SELECT e.id, e.source_id, e.target_name, e.target_id, e.kind,
3302                            e.file_path, e.line, e.resolution_source, s.name, i.depth + 1
3303                     FROM impacted i
3304                     JOIN edges e ON e.target_name = i.source_name
3305                     LEFT JOIN symbols s ON e.source_id = s.id
3306                     WHERE i.source_name IS NOT NULL AND i.depth < ?2
3307                     UNION
3308                     SELECT e.id, e.source_id, e.target_name, e.target_id, e.kind,
3309                            e.file_path, e.line, e.resolution_source, s.name, i.depth + 1
3310                     FROM impacted i
3311                     JOIN symbols t ON t.name = i.source_name
3312                     JOIN edges e ON e.target_id = t.id
3313                     LEFT JOIN symbols s ON e.source_id = s.id
3314                     WHERE i.source_name IS NOT NULL AND i.depth < ?2)
3315                 SELECT source_id, MIN(depth) FROM impacted GROUP BY edge_id
3316                 ORDER BY depth, edge_id",
3317            )
3318            .unwrap();
3319        let plan = stmt
3320            .query_map(params!["x", 3], |row| row.get::<_, String>(3))
3321            .unwrap()
3322            .collect::<std::result::Result<Vec<_>, _>>()
3323            .unwrap()
3324            .join("\n");
3325        // Assert on the full EQP detail (with the trailing `(target_name=?)` /
3326        // `(target_id=?)`): a bare `contains("idx_edges_target")` is subsumed by
3327        // `idx_edges_target_id` (prefix), so it would pass even if the literal
3328        // arm regressed to a scan. The literal arm must seek idx_edges_target on
3329        // target_name; the resolved arm must seek idx_edges_target_id.
3330        assert!(
3331            plan.contains("idx_edges_target (target_name="),
3332            "impact() literal arm must seek idx_edges_target on target_name; got plan:\n{plan}"
3333        );
3334        assert!(
3335            plan.contains("idx_edges_target_id (target_id="),
3336            "impact() resolved arm must seek idx_edges_target_id on target_id; got plan:\n{plan}"
3337        );
3338        assert!(
3339            !plan.contains("CORRELATED"),
3340            "impact() must not run a correlated subquery per edge; got plan:\n{plan}"
3341        );
3342        // Direct anti-scan guard (mirrors the refs() plan test): neither
3343        // recursive arm may full-scan edges. `SCAN i` over the small frontier
3344        // is fine; a `SCAN e`/`SCAN edges` is the regression.
3345        assert!(
3346            !plan.contains("SCAN e\n") && !plan.ends_with("SCAN e") && !plan.contains("SCAN edges"),
3347            "impact() must not full-scan edges; got plan:\n{plan}"
3348        );
3349    }
3350
3351    #[test]
3352    fn test_per_file_edge_delete_uses_file_index() {
3353        // Plan regression: clear_file_data_in_tx's DELETE FROM edges WHERE
3354        // file_path=? must use an index, not full-scan. A scan makes
3355        // --force/first-index O(files×edges) (the per-file-clear quadratic).
3356        let db = Database::open_memory().unwrap();
3357        let mut stmt = db
3358            .conn
3359            .prepare("EXPLAIN QUERY PLAN DELETE FROM edges WHERE file_path = ?1")
3360            .unwrap();
3361        let plan = stmt
3362            .query_map(params!["a.py"], |row| row.get::<_, String>(3))
3363            .unwrap()
3364            .collect::<std::result::Result<Vec<_>, _>>()
3365            .unwrap()
3366            .join("\n");
3367        assert!(
3368            plan.contains("idx_edges_file"),
3369            "per-file edge delete must drive off edges(file_path); got plan:\n{plan}"
3370        );
3371    }
3372
3373    #[test]
3374    fn test_compute_in_degrees_plan_has_no_correlated_subquery() {
3375        // Plan regression: the in-degree UPDATE must materialize counts once and
3376        // join by PK, not re-scan it per row (correlated subquery → O(symbols×edges)).
3377        let db = Database::open_memory().unwrap();
3378        let mut stmt = db
3379            .conn
3380            .prepare(
3381                "EXPLAIN QUERY PLAN
3382                 UPDATE symbols SET in_degree = counts.cnt
3383                 FROM (
3384                     SELECT target_id, COUNT(*) AS cnt
3385                     FROM edges WHERE target_id IS NOT NULL
3386                     GROUP BY target_id
3387                 ) AS counts
3388                 WHERE symbols.id = counts.target_id",
3389            )
3390            .unwrap();
3391        let plan = stmt
3392            .query_map([], |row| row.get::<_, String>(3))
3393            .unwrap()
3394            .collect::<std::result::Result<Vec<_>, _>>()
3395            .unwrap()
3396            .join("\n");
3397        assert!(
3398            !plan.to_uppercase().contains("CORRELATED"),
3399            "in-degree UPDATE must not use a correlated subquery; got plan:\n{plan}"
3400        );
3401    }
3402
3403    #[test]
3404    fn test_compute_in_degrees_scoped_resets_target_that_lost_edge() {
3405        let db = Database::open_memory().unwrap();
3406
3407        let foo = test_symbol("foo", SymbolKind::Function, "a.py", 1);
3408        let bar = test_symbol("bar", SymbolKind::Function, "b.py", 1);
3409        let baz = test_symbol("baz", SymbolKind::Function, "c.py", 1);
3410        db.insert_symbol(&foo).unwrap();
3411        db.insert_symbol(&bar).unwrap();
3412        db.insert_symbol(&baz).unwrap();
3413
3414        // bar calls foo, baz calls foo
3415        db.insert_edge(&Edge::new(&bar.id, "foo", EdgeKind::Calls, "b.py", 5))
3416            .unwrap();
3417        db.insert_edge(&Edge::new(&baz.id, "foo", EdgeKind::Calls, "c.py", 3))
3418            .unwrap();
3419
3420        db.resolve_edges().unwrap();
3421        db.compute_in_degrees().unwrap();
3422        let results = db.search("foo", None, None, 10).unwrap();
3423        assert_eq!(results[0].in_degree, 2);
3424
3425        // Re-index b.py with the call removed: the indexer clears the file's
3426        // old edges before the scoped recompute, so foo (unchanged a.py) has
3427        // already lost an incoming edge by the time the recompute runs.
3428        db.clear_edges_for_file("b.py").unwrap();
3429        let dirty = std::collections::HashSet::from(["b.py".to_string()]);
3430        db.invalidate_edges_targeting(&dirty).unwrap();
3431        db.resolve_edges_scoped(&dirty).unwrap();
3432        db.compute_in_degrees_scoped(&dirty).unwrap();
3433
3434        let results = db.search("foo", None, None, 10).unwrap();
3435        assert_eq!(results[0].in_degree, 1);
3436    }
3437
3438    // ── Embedding dimension migration tests ──
3439
3440    #[test]
3441    fn test_open_stores_embedding_dimension() {
3442        let dir = tempfile::TempDir::new().unwrap();
3443        let db_path = dir.path().join("test.db");
3444
3445        let db = Database::open(&db_path, 384).unwrap();
3446        let stored: String = db
3447            .get_metadata("embedding_dimension")
3448            .unwrap()
3449            .expect("dimension should be stored");
3450        assert_eq!(stored, "384");
3451    }
3452
3453    #[test]
3454    fn test_open_with_different_dimension_clears_embeddings() {
3455        let dir = tempfile::TempDir::new().unwrap();
3456        let db_path = dir.path().join("test.db");
3457
3458        // First open with 384-dim
3459        {
3460            let db = Database::open(&db_path, 384).unwrap();
3461            let sym = Symbol::new("foo", SymbolKind::Function, "a.py", 1, 10, 0, 100, None);
3462            db.insert_symbol(&sym).unwrap();
3463            db.upsert_symbol_content(&sym.id, "foo", "def foo():", "header")
3464                .unwrap();
3465            let eid = db.get_or_create_embedding_id(&sym.id).unwrap();
3466            let bytes = vec![0u8; 384 * 4];
3467            db.insert_embeddings(&[(eid, bytes)]).unwrap();
3468            assert_eq!(db.embedding_count().unwrap(), 1);
3469        }
3470
3471        // Reopen with 768-dim — should auto-wipe embeddings
3472        {
3473            let db = Database::open(&db_path, 768).unwrap();
3474            assert_eq!(db.embedding_count().unwrap(), 0);
3475            let stored: String = db
3476                .get_metadata("embedding_dimension")
3477                .unwrap()
3478                .expect("dimension should be updated");
3479            assert_eq!(stored, "768");
3480        }
3481    }
3482
3483    #[test]
3484    fn test_open_same_dimension_preserves_embeddings() {
3485        let dir = tempfile::TempDir::new().unwrap();
3486        let db_path = dir.path().join("test.db");
3487
3488        // First open
3489        {
3490            let db = Database::open(&db_path, 384).unwrap();
3491            let sym = Symbol::new("bar", SymbolKind::Function, "b.py", 1, 10, 0, 100, None);
3492            db.insert_symbol(&sym).unwrap();
3493            db.upsert_symbol_content(&sym.id, "bar", "def bar():", "header")
3494                .unwrap();
3495            let eid = db.get_or_create_embedding_id(&sym.id).unwrap();
3496            let bytes = vec![0u8; 384 * 4];
3497            db.insert_embeddings(&[(eid, bytes)]).unwrap();
3498        }
3499
3500        // Reopen with same dimension — embeddings preserved
3501        {
3502            let db = Database::open(&db_path, 384).unwrap();
3503            assert_eq!(db.embedding_count().unwrap(), 1);
3504        }
3505    }
3506
3507    #[test]
3508    fn test_default_dim_preserves_stored_non_default() {
3509        let dir = tempfile::TempDir::new().unwrap();
3510        let db_path = dir.path().join("test.db");
3511
3512        // First open with non-default dimension (e.g. Ollama auto-detected 768)
3513        {
3514            let db = Database::open(&db_path, 768).unwrap();
3515            let sym = Symbol::new("baz", SymbolKind::Function, "c.py", 1, 10, 0, 100, None);
3516            db.insert_symbol(&sym).unwrap();
3517            db.upsert_symbol_content(&sym.id, "baz", "def baz():", "header")
3518                .unwrap();
3519            let eid = db.get_or_create_embedding_id(&sym.id).unwrap();
3520            let bytes = vec![0u8; 768 * 4];
3521            db.insert_embeddings(&[(eid, bytes)]).unwrap();
3522        }
3523
3524        // Reopen with DEFAULT_EMBEDDING_DIM (384) — must preserve 768-dim embeddings
3525        {
3526            let db = Database::open(&db_path, DEFAULT_EMBEDDING_DIM).unwrap();
3527            assert_eq!(db.embedding_count().unwrap(), 1);
3528            let stored: i64 = db
3529                .conn
3530                .query_row(
3531                    "SELECT CAST(value AS INTEGER) FROM metadata WHERE key = 'embedding_dimension'",
3532                    [],
3533                    |row| row.get(0),
3534                )
3535                .unwrap();
3536            assert_eq!(stored, 768);
3537        }
3538    }
3539
3540    #[test]
3541    fn test_explicit_non_default_dim_wipes_different_stored() {
3542        let dir = tempfile::TempDir::new().unwrap();
3543        let db_path = dir.path().join("test.db");
3544
3545        // First open with 768
3546        {
3547            let db = Database::open(&db_path, 768).unwrap();
3548            let sym = Symbol::new("qux", SymbolKind::Function, "d.py", 1, 10, 0, 100, None);
3549            db.insert_symbol(&sym).unwrap();
3550            db.upsert_symbol_content(&sym.id, "qux", "def qux():", "header")
3551                .unwrap();
3552            let eid = db.get_or_create_embedding_id(&sym.id).unwrap();
3553            let bytes = vec![0u8; 768 * 4];
3554            db.insert_embeddings(&[(eid, bytes)]).unwrap();
3555        }
3556
3557        // Reopen with explicit 1536 — this IS a real dimension change, must wipe
3558        {
3559            let db = Database::open(&db_path, 1536).unwrap();
3560            assert_eq!(db.embedding_count().unwrap(), 0);
3561        }
3562    }
3563
3564    #[test]
3565    fn test_reopen_same_dim_does_not_rewrite_metadata() {
3566        // True early-return guarantee: when stored dim already matches the
3567        // requested dim, `handle_embedding_dimension` should not touch the
3568        // metadata table. We assert this by snapshotting the row's content
3569        // before and after re-open and verifying no write occurred (rowid
3570        // would advance on INSERT OR REPLACE).
3571        let dir = tempfile::TempDir::new().unwrap();
3572        let db_path = dir.path().join("test.db");
3573
3574        let _db = Database::open(&db_path, 384).unwrap();
3575
3576        let rowid_before: i64 = {
3577            let conn = Connection::open(&db_path).unwrap();
3578            conn.query_row(
3579                "SELECT rowid FROM metadata WHERE key = 'embedding_dimension'",
3580                [],
3581                |row| row.get(0),
3582            )
3583            .unwrap()
3584        };
3585
3586        let _db = Database::open(&db_path, 384).unwrap();
3587
3588        let rowid_after: i64 = {
3589            let conn = Connection::open(&db_path).unwrap();
3590            conn.query_row(
3591                "SELECT rowid FROM metadata WHERE key = 'embedding_dimension'",
3592                [],
3593                |row| row.get(0),
3594            )
3595            .unwrap()
3596        };
3597
3598        // INSERT OR REPLACE assigns a new rowid; identity here proves we
3599        // skipped the write entirely.
3600        assert_eq!(
3601            rowid_before, rowid_after,
3602            "same-dim reopen should not rewrite the embedding_dimension row"
3603        );
3604    }
3605
3606    #[test]
3607    fn test_retry_busy_returns_on_non_busy_error() {
3608        // A non-busy error should propagate immediately, no retries.
3609        let attempts = std::cell::Cell::new(0);
3610        let result = retry_busy(|| -> std::result::Result<(), rusqlite::Error> {
3611            attempts.set(attempts.get() + 1);
3612            Err(rusqlite::Error::InvalidQuery)
3613        });
3614        assert!(matches!(result, Err(rusqlite::Error::InvalidQuery)));
3615        assert_eq!(attempts.get(), 1, "non-busy errors must not retry");
3616    }
3617
3618    #[test]
3619    fn test_retry_busy_succeeds_after_transient_busy() {
3620        // Simulate a writer that returns BUSY on the first call and Ok on the second.
3621        let attempts = std::cell::Cell::new(0);
3622        let result = retry_busy(|| -> std::result::Result<u32, rusqlite::Error> {
3623            attempts.set(attempts.get() + 1);
3624            if attempts.get() == 1 {
3625                Err(rusqlite::Error::SqliteFailure(
3626                    rusqlite::ffi::Error {
3627                        code: rusqlite::ErrorCode::DatabaseBusy,
3628                        extended_code: 5,
3629                    },
3630                    Some("database is locked".to_string()),
3631                ))
3632            } else {
3633                Ok(42)
3634            }
3635        });
3636        assert_eq!(result.unwrap(), 42);
3637        assert_eq!(attempts.get(), 2);
3638    }
3639
3640    #[test]
3641    fn test_retry_busy_exhausts_and_propagates() {
3642        // After backoff schedule is exhausted, the original BUSY error must surface.
3643        let attempts = std::cell::Cell::new(0);
3644        let result = retry_busy(|| -> std::result::Result<(), rusqlite::Error> {
3645            attempts.set(attempts.get() + 1);
3646            Err(rusqlite::Error::SqliteFailure(
3647                rusqlite::ffi::Error {
3648                    code: rusqlite::ErrorCode::DatabaseBusy,
3649                    extended_code: 5,
3650                },
3651                Some("database is locked".to_string()),
3652            ))
3653        });
3654        assert!(matches!(
3655            result,
3656            Err(rusqlite::Error::SqliteFailure(
3657                rusqlite::ffi::Error {
3658                    code: rusqlite::ErrorCode::DatabaseBusy,
3659                    ..
3660                },
3661                _
3662            ))
3663        ));
3664        // 1 initial call + MIGRATION_RETRY_BACKOFF_MS.len() retries
3665        assert_eq!(attempts.get(), MIGRATION_RETRY_BACKOFF_MS.len() + 1);
3666    }
3667
3668    // ── Embedding fingerprint tests (Phase 6b) ──
3669
3670    fn fp(provider: &str, model: &str, dim: usize) -> EmbeddingFingerprint {
3671        EmbeddingFingerprint {
3672            provider: provider.to_string(),
3673            model: model.to_string(),
3674            dimension: dim,
3675        }
3676    }
3677
3678    fn seed_embedding(db: &Database, dim: usize, sym_name: &str) {
3679        let sym = Symbol::new(sym_name, SymbolKind::Function, "f.py", 1, 10, 0, 100, None);
3680        db.insert_symbol(&sym).unwrap();
3681        db.upsert_symbol_content(&sym.id, sym_name, "def f():", "header")
3682            .unwrap();
3683        let eid = db.get_or_create_embedding_id(&sym.id).unwrap();
3684        let bytes = vec![0u8; dim * 4];
3685        db.insert_embeddings(&[(eid, bytes)]).unwrap();
3686    }
3687
3688    #[test]
3689    fn test_fingerprint_match_is_noop() {
3690        let dir = tempfile::TempDir::new().unwrap();
3691        let db_path = dir.path().join("test.db");
3692        let db = Database::open(&db_path, 384).unwrap();
3693        let f = fp("local", "BGE-small-en-v1.5", 384);
3694        db.reconcile_embedding_fingerprint(&f).unwrap();
3695        seed_embedding(&db, 384, "foo");
3696        // Reconciling identical fingerprint must preserve embeddings.
3697        db.reconcile_embedding_fingerprint(&f).unwrap();
3698        assert_eq!(db.embedding_count().unwrap(), 1);
3699    }
3700
3701    #[test]
3702    fn test_fingerprint_provider_swap_wipes() {
3703        let dir = tempfile::TempDir::new().unwrap();
3704        let db_path = dir.path().join("test.db");
3705        let db = Database::open(&db_path, 384).unwrap();
3706        let f1 = fp("local", "BGE-small-en-v1.5", 384);
3707        db.reconcile_embedding_fingerprint(&f1).unwrap();
3708        seed_embedding(&db, 384, "bar");
3709        assert_eq!(db.embedding_count().unwrap(), 1);
3710
3711        // Same dim + model name, different provider class → wipe.
3712        let f2 = fp("ollama", "BGE-small-en-v1.5", 384);
3713        db.reconcile_embedding_fingerprint(&f2).unwrap();
3714        assert_eq!(db.embedding_count().unwrap(), 0);
3715        assert_eq!(
3716            db.get_metadata("embedding_provider").unwrap().as_deref(),
3717            Some("ollama")
3718        );
3719    }
3720
3721    #[test]
3722    fn test_fingerprint_model_swap_wipes() {
3723        let dir = tempfile::TempDir::new().unwrap();
3724        let db_path = dir.path().join("test.db");
3725        let db = Database::open(&db_path, 384).unwrap();
3726        let f1 = fp("local", "BGE-small-en-v1.5", 384);
3727        db.reconcile_embedding_fingerprint(&f1).unwrap();
3728        seed_embedding(&db, 384, "baz");
3729        assert_eq!(db.embedding_count().unwrap(), 1);
3730
3731        // Same provider + dim, different model → still a swap, must wipe.
3732        let f2 = fp("local", "AllMiniLML6V2", 384);
3733        db.reconcile_embedding_fingerprint(&f2).unwrap();
3734        assert_eq!(db.embedding_count().unwrap(), 0);
3735        assert_eq!(
3736            db.get_metadata("embedding_model").unwrap().as_deref(),
3737            Some("AllMiniLML6V2")
3738        );
3739    }
3740
3741    #[test]
3742    fn test_fingerprint_backfill_does_not_wipe() {
3743        // Simulate an older cartog DB: dimension recorded, provider/model not yet.
3744        let dir = tempfile::TempDir::new().unwrap();
3745        let db_path = dir.path().join("test.db");
3746        let db = Database::open(&db_path, 384).unwrap();
3747        seed_embedding(&db, 384, "qux");
3748        assert!(db.get_metadata("embedding_provider").unwrap().is_none());
3749        assert_eq!(db.embedding_count().unwrap(), 1);
3750
3751        // First reconcile after upgrade: backfill provider/model without wiping.
3752        let f = fp("local", "BGE-small-en-v1.5", 384);
3753        db.reconcile_embedding_fingerprint(&f).unwrap();
3754        assert_eq!(
3755            db.embedding_count().unwrap(),
3756            1,
3757            "backfill must preserve existing embeddings"
3758        );
3759        assert_eq!(
3760            db.get_metadata("embedding_provider").unwrap().as_deref(),
3761            Some("local")
3762        );
3763        assert_eq!(
3764            db.get_metadata("embedding_model").unwrap().as_deref(),
3765            Some("BGE-small-en-v1.5")
3766        );
3767    }
3768
3769    #[test]
3770    fn test_fingerprint_dim_change_wipes() {
3771        // A real dimension change must wipe even if provider/model also change.
3772        let dir = tempfile::TempDir::new().unwrap();
3773        let db_path = dir.path().join("test.db");
3774        let db = Database::open(&db_path, 384).unwrap();
3775        let f1 = fp("local", "BGE-small-en-v1.5", 384);
3776        db.reconcile_embedding_fingerprint(&f1).unwrap();
3777        seed_embedding(&db, 384, "quux");
3778        assert_eq!(db.embedding_count().unwrap(), 1);
3779
3780        let f2 = fp("local", "BGELargeENV15", 1024);
3781        db.reconcile_embedding_fingerprint(&f2).unwrap();
3782        assert_eq!(db.embedding_count().unwrap(), 0);
3783        let stored_dim: i64 = db
3784            .conn
3785            .query_row(
3786                "SELECT CAST(value AS INTEGER) FROM metadata WHERE key = 'embedding_dimension'",
3787                [],
3788                |row| row.get(0),
3789            )
3790            .unwrap();
3791        assert_eq!(stored_dim, 1024);
3792        // A successful wipe must also recreate symbol_vec at the new dim.
3793        // Without this assertion, an early return between the DROP and the
3794        // CREATE in reconcile_embedding_fingerprint would pass the count +
3795        // metadata checks above while leaving the DB unusable for RAG.
3796        assert!(
3797            symbol_vec_exists(&db.conn).unwrap(),
3798            "successful reconcile must recreate symbol_vec"
3799        );
3800    }
3801
3802    // ── Read-only attach tests (Phase 3) ──
3803
3804    #[test]
3805    fn test_open_readonly_succeeds_and_marks_read_only() {
3806        let dir = tempfile::TempDir::new().unwrap();
3807        let db_path = dir.path().join("test.db");
3808
3809        // Primary creates and writes a fingerprint.
3810        {
3811            let db = Database::open(&db_path, 384).unwrap();
3812            db.reconcile_embedding_fingerprint(&fp("local", "BGE-small-en-v1.5", 384))
3813                .unwrap();
3814            seed_embedding(&db, 384, "foo");
3815        }
3816
3817        // Reader attaches read-only.
3818        let reader = Database::open_readonly(&db_path).unwrap();
3819        assert!(reader.is_read_only(), "open_readonly must set the flag");
3820        let pinned = reader.pinned_attach().expect("read-only attach pins state");
3821        assert_eq!(pinned.schema_version, SCHEMA_VERSION);
3822        assert_eq!(
3823            pinned.embedding,
3824            Some(fp("local", "BGE-small-en-v1.5", 384))
3825        );
3826    }
3827
3828    #[test]
3829    fn test_open_readonly_can_query_existing_data() {
3830        let dir = tempfile::TempDir::new().unwrap();
3831        let db_path = dir.path().join("test.db");
3832
3833        {
3834            let db = Database::open(&db_path, 384).unwrap();
3835            let sym = Symbol::new(
3836                "callable",
3837                SymbolKind::Function,
3838                "a.py",
3839                1,
3840                10,
3841                0,
3842                100,
3843                None,
3844            );
3845            db.insert_symbol(&sym).unwrap();
3846        }
3847
3848        let reader = Database::open_readonly(&db_path).unwrap();
3849        let count: i64 = reader
3850            .conn
3851            .query_row("SELECT COUNT(*) FROM symbols", [], |row| row.get(0))
3852            .unwrap();
3853        assert_eq!(count, 1, "reader sees primary's data");
3854    }
3855
3856    #[test]
3857    fn test_open_readonly_refuses_writes() {
3858        // SQLITE_OPEN_READ_ONLY must turn any INSERT into SQLITE_READONLY at
3859        // runtime — defense-in-depth for the higher-level tool gating in
3860        // Phase 4.
3861        let dir = tempfile::TempDir::new().unwrap();
3862        let db_path = dir.path().join("test.db");
3863        {
3864            let _db = Database::open(&db_path, 384).unwrap();
3865        }
3866
3867        let reader = Database::open_readonly(&db_path).unwrap();
3868        let err = reader
3869            .conn
3870            .execute(
3871                "INSERT OR REPLACE INTO metadata (key, value) VALUES ('x', 'y')",
3872                [],
3873            )
3874            .unwrap_err();
3875        // The specific code is SQLITE_READONLY (8); rusqlite surfaces it as
3876        // SqliteFailure with the matching error code variant. We just check
3877        // that some error came back rather than match on the FFI integer.
3878        let msg = err.to_string();
3879        assert!(
3880            msg.contains("read") || msg.contains("readonly") || msg.contains("write"),
3881            "read-only DB write should fail with a read-only-flavored error, got: {msg}"
3882        );
3883    }
3884
3885    #[test]
3886    fn test_open_readonly_detects_schema_drift() {
3887        let dir = tempfile::TempDir::new().unwrap();
3888        let db_path = dir.path().join("test.db");
3889        {
3890            let db = Database::open(&db_path, 384).unwrap();
3891            // Simulate a future cartog: bump schema_version on disk.
3892            db.set_metadata("schema_version", "9999").unwrap();
3893        }
3894
3895        let err = Database::open_readonly(&db_path).unwrap_err();
3896        match err {
3897            DbError::SchemaDrift { expected, stored } => {
3898                assert_eq!(expected, SCHEMA_VERSION);
3899                assert_eq!(stored, 9999);
3900            }
3901            other => panic!("expected SchemaDrift, got {other:?}"),
3902        }
3903    }
3904
3905    #[test]
3906    fn test_open_readonly_does_not_run_migrations() {
3907        // After open_readonly returns, no PRAGMAs or writes should have
3908        // landed beyond what was there before. We test the visible
3909        // consequence: an existing user-set metadata key is unchanged.
3910        let dir = tempfile::TempDir::new().unwrap();
3911        let db_path = dir.path().join("test.db");
3912        {
3913            let db = Database::open(&db_path, 384).unwrap();
3914            db.set_metadata("user_marker", "untouched").unwrap();
3915        }
3916        let _reader = Database::open_readonly(&db_path).unwrap();
3917        // Re-open writable to verify the marker is still there and the
3918        // schema didn't get rewritten.
3919        let primary = Database::open(&db_path, 384).unwrap();
3920        assert_eq!(
3921            primary.get_metadata("user_marker").unwrap().as_deref(),
3922            Some("untouched")
3923        );
3924    }
3925
3926    #[test]
3927    fn test_open_default_is_not_read_only() {
3928        let dir = tempfile::TempDir::new().unwrap();
3929        let db_path = dir.path().join("test.db");
3930        let db = Database::open(&db_path, 384).unwrap();
3931        assert!(!db.is_read_only());
3932        assert!(db.pinned_attach().is_none());
3933    }
3934
3935    // ── Promotion path: open_existing_rw (Phase 5) ──
3936
3937    #[test]
3938    fn test_open_existing_rw_opens_writable_and_skips_migrations() {
3939        let dir = tempfile::TempDir::new().unwrap();
3940        let db_path = dir.path().join("test.db");
3941        // Materialize with a user-set metadata marker we can re-read.
3942        {
3943            let db = Database::open(&db_path, 384).unwrap();
3944            db.set_metadata("marker", "preserved").unwrap();
3945        }
3946
3947        let promoted = Database::open_existing_rw(&db_path).unwrap();
3948        assert!(!promoted.is_read_only(), "open_existing_rw is RW");
3949        assert!(promoted.pinned_attach().is_none(), "RW opens have no pin");
3950        // The marker survives (we didn't wipe anything).
3951        assert_eq!(
3952            promoted.get_metadata("marker").unwrap().as_deref(),
3953            Some("preserved")
3954        );
3955        // We can write — confirming it's a real RW handle.
3956        promoted.set_metadata("write_check", "ok").unwrap();
3957    }
3958
3959    #[test]
3960    fn test_open_existing_rw_detects_schema_drift() {
3961        let dir = tempfile::TempDir::new().unwrap();
3962        let db_path = dir.path().join("test.db");
3963        {
3964            let db = Database::open(&db_path, 384).unwrap();
3965            db.set_metadata("schema_version", "9999").unwrap();
3966        }
3967        let err = Database::open_existing_rw(&db_path).unwrap_err();
3968        match err {
3969            DbError::SchemaDrift { expected, stored } => {
3970                assert_eq!(expected, SCHEMA_VERSION);
3971                assert_eq!(stored, 9999);
3972            }
3973            other => panic!("expected SchemaDrift, got {other:?}"),
3974        }
3975    }
3976
3977    #[test]
3978    fn test_database_open_alone_does_not_change_fingerprint() {
3979        // Regression for the cartog rag search path: opening the DB (which
3980        // every CLI command does) must not touch the embedding fingerprint
3981        // unless reconcile_embedding_fingerprint is explicitly called.
3982        // Pre-fix, cmd_rag_search called reconcile on every invocation,
3983        // which could race a primary serve's writes if the user changed
3984        // provider in .cartog.toml since last index. After the fix,
3985        // cmd_rag_search opens RW but does NOT reconcile.
3986        //
3987        // This test asserts the invariant at the layer below: Database::open
3988        // does not, by itself, alter provider/model metadata. Combined with
3989        // the production code change (no reconcile call in cmd_rag_search),
3990        // a CLI search invocation cannot wipe symbol_vec.
3991        let dir = tempfile::TempDir::new().unwrap();
3992        let db_path = dir.path().join("test.db");
3993        let original_fp = fp("local", "BGE-small-en-v1.5", 384);
3994        {
3995            let db = Database::open(&db_path, 384).unwrap();
3996            db.reconcile_embedding_fingerprint(&original_fp).unwrap();
3997            seed_embedding(&db, 384, "guard");
3998        }
3999        // Re-open as cmd_rag_search would (RW, no reconcile). Same dim,
4000        // so handle_embedding_dimension early-returns; nothing rewrites.
4001        {
4002            let _db = Database::open(&db_path, 384).unwrap();
4003        }
4004        // Fingerprint and embeddings intact.
4005        let db = Database::open(&db_path, 384).unwrap();
4006        assert_eq!(
4007            db.get_metadata("embedding_provider").unwrap().as_deref(),
4008            Some("local")
4009        );
4010        assert_eq!(
4011            db.get_metadata("embedding_model").unwrap().as_deref(),
4012            Some("BGE-small-en-v1.5")
4013        );
4014        assert_eq!(db.embedding_count().unwrap(), 1);
4015    }
4016
4017    #[test]
4018    fn test_open_readonly_missing_schema_version_is_schema_drift() {
4019        // Regression: pre-fix, a metadata table without a schema_version
4020        // row surfaced as DbError::Sqlite(QueryReturnedNoRows) instead of
4021        // the actionable SchemaDrift. Callers (cartog serve) print
4022        // different messages for the two — drift is the right one ("the
4023        // primary upgraded cartog; restart this session"), the raw
4024        // rusqlite error is opaque.
4025        let dir = tempfile::TempDir::new().unwrap();
4026        let db_path = dir.path().join("test.db");
4027        // Create a DB with our schema, then delete the schema_version row.
4028        {
4029            let db = Database::open(&db_path, 384).unwrap();
4030            db.conn
4031                .execute("DELETE FROM metadata WHERE key = 'schema_version'", [])
4032                .unwrap();
4033        }
4034        let err = Database::open_readonly(&db_path).unwrap_err();
4035        match err {
4036            DbError::SchemaDrift { expected, stored } => {
4037                assert_eq!(expected, SCHEMA_VERSION);
4038                assert_eq!(stored, 0, "missing row should surface as stored=0");
4039            }
4040            other => panic!("expected SchemaDrift, got {other:?}"),
4041        }
4042    }
4043
4044    #[test]
4045    fn test_open_readonly_missing_metadata_table_is_schema_drift() {
4046        // Regression: a non-cartog SQLite file at the path (or a
4047        // partially-initialised DB where the `metadata` table is missing
4048        // entirely) used to surface as a raw rusqlite "no such table:
4049        // metadata" error instead of the actionable SchemaDrift. Fix:
4050        // read_schema_version catches that specific SqliteFailure and
4051        // returns stored=0.
4052        let dir = tempfile::TempDir::new().unwrap();
4053        let db_path = dir.path().join("test.db");
4054        // Build a SQLite file that's NOT a cartog DB: empty schema.
4055        {
4056            let conn = Connection::open(&db_path).unwrap();
4057            conn.execute_batch("CREATE TABLE unrelated (x INTEGER);")
4058                .unwrap();
4059        }
4060        let err = Database::open_readonly(&db_path).unwrap_err();
4061        match err {
4062            DbError::SchemaDrift { expected, stored } => {
4063                assert_eq!(expected, SCHEMA_VERSION);
4064                assert_eq!(stored, 0, "missing metadata table should be stored=0");
4065            }
4066            other => panic!("expected SchemaDrift, got {other:?}"),
4067        }
4068    }
4069
4070    #[test]
4071    fn test_open_existing_rw_missing_schema_version_is_schema_drift() {
4072        let dir = tempfile::TempDir::new().unwrap();
4073        let db_path = dir.path().join("test.db");
4074        {
4075            let db = Database::open(&db_path, 384).unwrap();
4076            db.conn
4077                .execute("DELETE FROM metadata WHERE key = 'schema_version'", [])
4078                .unwrap();
4079        }
4080        let err = Database::open_existing_rw(&db_path).unwrap_err();
4081        match err {
4082            DbError::SchemaDrift { expected, stored } => {
4083                assert_eq!(expected, SCHEMA_VERSION);
4084                assert_eq!(stored, 0);
4085            }
4086            other => panic!("expected SchemaDrift, got {other:?}"),
4087        }
4088    }
4089
4090    #[test]
4091    fn test_reconcile_rebuilds_when_metadata_matches_but_symbol_vec_missing() {
4092        // Defensive regression: if `symbol_vec` is missing for any reason
4093        // (external corruption, pre-C4 cartog that crashed mid-migration)
4094        // but metadata still claims the matching fingerprint, the fast-
4095        // path early return previously skipped the rebuild, leaving the
4096        // DB stuck. After the fix, the symbol_vec_exists() check forces
4097        // a rebuild.
4098        let dir = tempfile::TempDir::new().unwrap();
4099        let db_path = dir.path().join("test.db");
4100        let f = fp("local", "BGE-small-en-v1.5", 384);
4101
4102        // 1. Establish a normal state.
4103        {
4104            let db = Database::open(&db_path, 384).unwrap();
4105            db.reconcile_embedding_fingerprint(&f).unwrap();
4106        }
4107
4108        // 2. Drop the vector table out-of-band, simulating corruption.
4109        {
4110            let db = Database::open(&db_path, 384).unwrap();
4111            db.conn
4112                .execute("DROP TABLE IF EXISTS symbol_vec", [])
4113                .unwrap();
4114            // Metadata unchanged: still claims (local, BGE-small-en-v1.5, 384).
4115            assert_eq!(
4116                db.get_metadata("embedding_dimension").unwrap().as_deref(),
4117                Some("384")
4118            );
4119        }
4120
4121        // 3. Re-reconcile with the same fingerprint. Pre-fix: early-return
4122        //    skipped rebuild → symbol_vec stayed missing forever. Post-fix:
4123        //    the symbol_vec_exists() check forces the rebuild.
4124        {
4125            let db = Database::open(&db_path, 384).unwrap();
4126            db.reconcile_embedding_fingerprint(&f).unwrap();
4127            let exists: bool = db
4128                .conn
4129                .query_row(
4130                    "SELECT 1 FROM sqlite_master WHERE name='symbol_vec'",
4131                    [],
4132                    |row| row.get::<_, i64>(0),
4133                )
4134                .optional()
4135                .unwrap()
4136                .is_some();
4137            assert!(
4138                exists,
4139                "reconcile must rebuild symbol_vec when missing, even on metadata match"
4140            );
4141        }
4142    }
4143
4144    #[test]
4145    fn test_handle_embedding_dimension_rebuilds_when_symbol_vec_missing() {
4146        // Same defensive guarantee for the lower-level handle_embedding_dimension
4147        // fast-path. Open a DB, drop symbol_vec, re-open: the table must come
4148        // back even though stored_dim == requested_dim.
4149        let dir = tempfile::TempDir::new().unwrap();
4150        let db_path = dir.path().join("test.db");
4151        {
4152            let db = Database::open(&db_path, 384).unwrap();
4153            db.conn
4154                .execute("DROP TABLE IF EXISTS symbol_vec", [])
4155                .unwrap();
4156        }
4157        let db = Database::open(&db_path, 384).unwrap();
4158        let exists: bool = db
4159            .conn
4160            .query_row(
4161                "SELECT 1 FROM sqlite_master WHERE name='symbol_vec'",
4162                [],
4163                |row| row.get::<_, i64>(0),
4164            )
4165            .optional()
4166            .unwrap()
4167            .is_some();
4168        assert!(
4169            exists,
4170            "Database::open must rebuild symbol_vec when missing, even on metadata match"
4171        );
4172    }
4173
4174    #[test]
4175    fn test_reconcile_fingerprint_rolls_back_on_midsequence_failure() {
4176        // Regression: pre-fix, each metadata write in
4177        // reconcile_embedding_fingerprint ran outside any transaction.
4178        // If the busy-retry on a later write exhausted (or any other
4179        // failure), the DB was left with partial state — e.g.
4180        // symbol_vec dropped, provider rewritten, dimension stale. The
4181        // next open would see (stored_dim != fp.dimension) → "wipe and
4182        // rebuild" but the embeddings would already be gone, and the
4183        // primary writer would silently keep operating against the
4184        // damaged DB.
4185        //
4186        // With the transaction wrapper, a mid-sequence failure rolls
4187        // back the entire reconcile. We exercise this by capping
4188        // max_page_count so a write in the middle of the sequence
4189        // fails with SQLITE_FULL.
4190        let dir = tempfile::TempDir::new().unwrap();
4191        let db_path = dir.path().join("test.db");
4192
4193        // 1. Establish a known state with our own embedding rows.
4194        let initial_fp = fp("local", "BGE-small-en-v1.5", 384);
4195        {
4196            let db = Database::open(&db_path, 384).unwrap();
4197            db.reconcile_embedding_fingerprint(&initial_fp).unwrap();
4198            seed_embedding(&db, 384, "seed");
4199        }
4200
4201        // 2. Force a deterministic mid-sequence failure via the
4202        //    RECONCILE_FAIL_AFTER_MODEL fault-injection hook (gated by
4203        //    #[cfg(test)]). Page-cap tricks don't reliably trigger
4204        //    SQLITE_FULL: SQLite reuses freed pages after DROP TABLE.
4205        let new_fp = fp("ollama", "nomic-embed-text-v2", 384);
4206        let outcome = {
4207            let db = Database::open(&db_path, 384).unwrap();
4208            RECONCILE_FAIL_AFTER_MODEL.with(|b| b.store(true, std::sync::atomic::Ordering::SeqCst));
4209            db.reconcile_embedding_fingerprint(&new_fp)
4210        };
4211        assert!(outcome.is_err(), "injected SQLITE_FULL must surface as Err");
4212
4213        // 3. Failure path: the DB on disk must still reflect the INITIAL
4214        //    fingerprint, not a partial state.
4215        let post = Database::open(&db_path, 384).unwrap();
4216        let stored_provider = post.get_metadata("embedding_provider").unwrap();
4217        let stored_model = post.get_metadata("embedding_model").unwrap();
4218        let stored_dim_str = post.get_metadata("embedding_dimension").unwrap();
4219        let symbol_vec_exists = post
4220            .conn
4221            .query_row(
4222                "SELECT 1 FROM sqlite_master WHERE type='table' AND name='symbol_vec'",
4223                [],
4224                |row| row.get::<_, i64>(0),
4225            )
4226            .optional()
4227            .unwrap()
4228            .is_some();
4229        assert_eq!(
4230            stored_provider.as_deref(),
4231            Some("local"),
4232            "failed reconcile must roll back provider"
4233        );
4234        assert_eq!(
4235            stored_model.as_deref(),
4236            Some("BGE-small-en-v1.5"),
4237            "failed reconcile must roll back model"
4238        );
4239        assert_eq!(
4240            stored_dim_str.as_deref(),
4241            Some("384"),
4242            "failed reconcile must roll back dimension"
4243        );
4244        assert!(
4245            symbol_vec_exists,
4246            "failed reconcile must roll back symbol_vec drop"
4247        );
4248        assert_eq!(
4249            post.embedding_count().unwrap(),
4250            1,
4251            "failed reconcile must roll back the symbol_embedding_map DELETE"
4252        );
4253    }
4254
4255    #[test]
4256    fn test_default_embedding_dim_constant() {
4257        assert_eq!(DEFAULT_EMBEDDING_DIM, 384);
4258    }
4259
4260    #[test]
4261    fn test_destructive_migration_creates_backup() {
4262        // Build a legacy v2 database file: pre-hash-columns, with indexed data.
4263        let tmp = tempfile::tempdir().unwrap();
4264        let db_path = tmp.path().join("legacy.db");
4265
4266        {
4267            register_sqlite_vec();
4268            let conn = Connection::open(&db_path).unwrap();
4269            // Minimal legacy schema that the wipe code will operate on.
4270            conn.execute_batch(
4271                "CREATE TABLE symbols (
4272                    id TEXT PRIMARY KEY, name TEXT, kind TEXT, file_path TEXT,
4273                    start_line INTEGER, end_line INTEGER, start_byte INTEGER, end_byte INTEGER,
4274                    parent_id TEXT, signature TEXT, visibility TEXT,
4275                    is_async BOOLEAN, docstring TEXT, in_degree INTEGER DEFAULT 0
4276                 );
4277                 CREATE TABLE edges (
4278                    id INTEGER PRIMARY KEY AUTOINCREMENT, source_id TEXT, target_name TEXT,
4279                    target_id TEXT, kind TEXT, file_path TEXT, line INTEGER
4280                 );
4281                 CREATE TABLE files (path TEXT PRIMARY KEY, last_modified REAL, hash TEXT,
4282                                     language TEXT, num_symbols INTEGER);
4283                 CREATE TABLE metadata (key TEXT PRIMARY KEY, value TEXT);
4284                 INSERT INTO symbols (id, name, kind, file_path) VALUES ('s1', 'foo', 'function', 'a.py');
4285                 INSERT INTO metadata (key, value) VALUES ('schema_version', '2');",
4286            )
4287            .unwrap();
4288        }
4289
4290        // Opening via the real entry point should back up the legacy file before wiping.
4291        let _db = Database::open(&db_path, DEFAULT_EMBEDDING_DIM).unwrap();
4292
4293        let backups: Vec<_> = std::fs::read_dir(tmp.path())
4294            .unwrap()
4295            .filter_map(|e| e.ok())
4296            .filter(|e| {
4297                e.file_name()
4298                    .to_string_lossy()
4299                    .starts_with("legacy.db.pre-v")
4300            })
4301            .collect();
4302        assert_eq!(
4303            backups.len(),
4304            1,
4305            "expected exactly one pre-migration backup, found {}",
4306            backups.len()
4307        );
4308    }
4309
4310    #[test]
4311    fn test_no_backup_for_fresh_database() {
4312        // A fresh DB should never produce a backup file.
4313        let tmp = tempfile::tempdir().unwrap();
4314        let db_path = tmp.path().join("fresh.db");
4315        let _db = Database::open(&db_path, DEFAULT_EMBEDDING_DIM).unwrap();
4316
4317        let backups: Vec<_> = std::fs::read_dir(tmp.path())
4318            .unwrap()
4319            .filter_map(|e| e.ok())
4320            .filter(|e| e.file_name().to_string_lossy().contains(".pre-v"))
4321            .collect();
4322        assert!(
4323            backups.is_empty(),
4324            "fresh DB should not create a backup file"
4325        );
4326    }
4327
4328    #[test]
4329    fn fresh_db_stamps_version_without_running_ladder() {
4330        // A fresh DB takes the fast-path stamp and skips the destructive v2→3
4331        // wipe. Regression guard for the duplicate-column WARN: the ladder must
4332        // not re-fire the additive ALTERs against the bootstrapped v6 shape.
4333        let tmp = tempfile::tempdir().unwrap();
4334        let db_path = tmp.path().join("fresh.db");
4335        let db = Database::open(&db_path, DEFAULT_EMBEDDING_DIM).unwrap();
4336
4337        // The destructive branch deletes the 'last_commit' row; the fast path
4338        // never enters it, so a marker written before re-open survives.
4339        db.set_metadata("last_commit", "deadbeef").unwrap();
4340        drop(db);
4341        let db = Database::open(&db_path, DEFAULT_EMBEDDING_DIM).unwrap();
4342        let last_commit: Option<String> = db
4343            .conn
4344            .query_row(
4345                "SELECT value FROM metadata WHERE key = 'last_commit'",
4346                [],
4347                |r| r.get(0),
4348            )
4349            .optional()
4350            .unwrap();
4351        assert_eq!(
4352            last_commit,
4353            Some("deadbeef".to_string()),
4354            "fresh re-open must not run the v2→3 wipe"
4355        );
4356
4357        let version: String = db
4358            .conn
4359            .query_row(
4360                "SELECT value FROM metadata WHERE key = 'schema_version'",
4361                [],
4362                |r| r.get(0),
4363            )
4364            .unwrap();
4365        assert_eq!(version, SCHEMA_VERSION.to_string());
4366    }
4367
4368    #[test]
4369    fn populated_v1_db_runs_full_ladder_to_current() {
4370        // Negative guard for the fresh-DB fast path: a real pre-versioning v1 DB
4371        // (no schema_version row, narrow v1 columns, but SEEDED with rows) must
4372        // NOT be misclassified as fresh. It runs the full v1→current ladder
4373        // including the intentional v2→3 stable-id wipe and lands at the current
4374        // schema version with every later column present.
4375        let tmp = tempfile::tempdir().unwrap();
4376        let path = tmp.path().join("v1.sqlite");
4377        {
4378            let conn = Connection::open(&path).unwrap();
4379            // True v1 shape: symbols end at docstring, edges end at line, no
4380            // query_log, no schema_version row.
4381            conn.execute_batch(
4382                "CREATE TABLE symbols (
4383                    id TEXT PRIMARY KEY, name TEXT, kind TEXT, file_path TEXT,
4384                    start_line INTEGER, end_line INTEGER, start_byte INTEGER, end_byte INTEGER,
4385                    parent_id TEXT, signature TEXT, visibility TEXT, is_async BOOLEAN, docstring TEXT);
4386                 CREATE TABLE edges (
4387                    id INTEGER PRIMARY KEY AUTOINCREMENT,
4388                    source_id TEXT NOT NULL, target_name TEXT NOT NULL, target_id TEXT,
4389                    kind TEXT NOT NULL, file_path TEXT NOT NULL, line INTEGER);
4390                 CREATE TABLE files (path TEXT PRIMARY KEY);
4391                 CREATE TABLE metadata (key TEXT PRIMARY KEY, value TEXT);
4392                 INSERT INTO symbols (id, name, kind, file_path) VALUES ('s:1', 'foo', 'function', 'a.py');
4393                 INSERT INTO edges (source_id, target_name, target_id, kind, file_path, line)
4394                   VALUES ('s:1', 'foo', 's:1', 'calls', 'a.py', 1);",
4395            )
4396            .unwrap();
4397        }
4398
4399        let db = Database::open(&path, DEFAULT_EMBEDDING_DIM).unwrap();
4400
4401        // Ladder reached the current version.
4402        let version: String = db
4403            .conn
4404            .query_row(
4405                "SELECT value FROM metadata WHERE key = 'schema_version'",
4406                [],
4407                |r| r.get(0),
4408            )
4409            .unwrap();
4410        assert_eq!(version, SCHEMA_VERSION.to_string());
4411
4412        // v5→6 column exists (the fast path would have skipped this ALTER).
4413        assert!(
4414            db.conn
4415                .prepare("SELECT resolution_source FROM edges LIMIT 0")
4416                .is_ok(),
4417            "resolution_source must be added by the real upgrade"
4418        );
4419
4420        // The intentional v2→3 wipe cleared the seeded rows for the stable-id
4421        // rebuild — proving the fresh-DB fast path was NOT taken.
4422        let symbol_count: i64 = db
4423            .conn
4424            .query_row("SELECT COUNT(*) FROM symbols", [], |r| r.get(0))
4425            .unwrap();
4426        assert_eq!(symbol_count, 0, "v2→3 wipe must run for a populated v1 DB");
4427    }
4428
4429    #[test]
4430    fn test_busy_timeout_pragma_is_set() {
4431        let tmp = tempfile::tempdir().unwrap();
4432        let db_path = tmp.path().join("timeout.db");
4433        let db = Database::open(&db_path, DEFAULT_EMBEDDING_DIM).unwrap();
4434
4435        let timeout: i64 = db
4436            .conn
4437            .query_row("PRAGMA busy_timeout;", [], |row| row.get(0))
4438            .unwrap();
4439        assert_eq!(timeout, BUSY_TIMEOUT_MS as i64);
4440    }
4441
4442    #[test]
4443    fn test_busy_timeout_makes_second_writer_retry_instead_of_aborting() {
4444        // Regression for #42. A second writer blocked by a held write lock
4445        // should *wait* (bounded by busy_timeout) rather than abort instantly.
4446        // Proven deterministically: against the same held lock, a connection
4447        // with busy_timeout=0 fails immediately, one with a non-zero timeout
4448        // only fails after waiting that long. No inter-thread timing race.
4449        let tmp = tempfile::tempdir().unwrap();
4450        let db_path = tmp.path().join("concurrent.db");
4451        let _ = Database::open(&db_path, DEFAULT_EMBEDDING_DIM).unwrap();
4452
4453        // Holder keeps an exclusive write lock for the whole test.
4454        let holder = Database::open(&db_path, DEFAULT_EMBEDDING_DIM).unwrap();
4455        holder
4456            .conn
4457            .execute_batch("BEGIN IMMEDIATE; INSERT INTO metadata (key, value) VALUES ('a', '1');")
4458            .unwrap();
4459
4460        let attempt_write = |timeout_ms: u32| -> std::time::Duration {
4461            let conn = Connection::open(&db_path).unwrap();
4462            conn.execute_batch(&format!("PRAGMA busy_timeout={timeout_ms};"))
4463                .unwrap();
4464            let start = std::time::Instant::now();
4465            let res = conn.execute("INSERT INTO metadata (key, value) VALUES ('b', '2');", []);
4466            assert!(res.is_err(), "write must fail while the lock is held");
4467            start.elapsed()
4468        };
4469
4470        // busy_timeout=0: SQLite aborts immediately, no retry.
4471        assert!(
4472            attempt_write(0) < std::time::Duration::from_millis(150),
4473            "with busy_timeout=0 the writer must fail immediately"
4474        );
4475        // busy_timeout=300ms: SQLite retries for the full window before failing.
4476        assert!(
4477            attempt_write(300) >= std::time::Duration::from_millis(250),
4478            "with a non-zero busy_timeout the writer must retry, not abort"
4479        );
4480
4481        holder.conn.execute_batch("COMMIT;").unwrap();
4482    }
4483
4484    // ── Typed error surface ──
4485
4486    #[test]
4487    fn test_db_error_wraps_into_anyhow() {
4488        // Callers that keep using anyhow::Result must still compose with DbError
4489        // transparently via `?`, thanks to the std::error::Error blanket impl.
4490        fn downstream() -> anyhow::Result<()> {
4491            let _db = Database::open_memory()?; // returns DbResult<Database>
4492            Ok(())
4493        }
4494        downstream().unwrap();
4495    }
4496
4497    #[test]
4498    fn test_db_error_open_variant_has_path() {
4499        // Give Database::open a path inside a non-writable location to force
4500        // a failure. We accept either PrepareDir (mkdir failed on the parent)
4501        // or Open (SQLite refused), since the failure point depends on the
4502        // platform's handling of `/dev/null/…`.
4503        let bad_path = std::path::PathBuf::from("/dev/null/definitely/not/a/db.sqlite");
4504        let err = Database::open(&bad_path, DEFAULT_EMBEDDING_DIM).unwrap_err();
4505        match err {
4506            DbError::Open { path, .. } => assert_eq!(path, bad_path),
4507            DbError::PrepareDir { path, .. } => {
4508                assert_eq!(path, bad_path.parent().unwrap());
4509            }
4510            other => panic!("expected DbError::Open or PrepareDir, got {other:?}"),
4511        }
4512    }
4513
4514    // ── Phase 3 atomicity: indexing transaction primitive ──
4515
4516    /// Build a minimal valid Symbol for transactional tests.
4517    fn tx_test_symbol(id: &str, file: &str) -> Symbol {
4518        Symbol {
4519            id: id.to_string(),
4520            name: id.to_string(),
4521            kind: SymbolKind::Function,
4522            file_path: file.to_string(),
4523            start_line: 1,
4524            end_line: 1,
4525            start_byte: 0,
4526            end_byte: 0,
4527            parent_id: None,
4528            signature: None,
4529            visibility: Visibility::Public,
4530            is_async: false,
4531            docstring: None,
4532            in_degree: 0,
4533            content_hash: Some("h".to_string()),
4534            subtree_hash: Some("s".to_string()),
4535        }
4536    }
4537
4538    #[test]
4539    fn test_indexing_tx_commit_persists_writes() {
4540        // Sanity: writes through *_in_tx variants under begin_indexing_tx
4541        // must persist after commit().
4542        let db = Database::open_memory().unwrap();
4543        let sym = tx_test_symbol("a.py:function:foo", "a.py");
4544
4545        let tx = db.begin_indexing_tx().unwrap();
4546        db.insert_symbols_in_tx(std::slice::from_ref(&sym)).unwrap();
4547        tx.commit().unwrap();
4548
4549        let count: i64 = db
4550            .conn
4551            .query_row("SELECT COUNT(*) FROM symbols", [], |row| row.get(0))
4552            .unwrap();
4553        assert_eq!(count, 1, "committed write must persist");
4554    }
4555
4556    #[test]
4557    fn test_indexing_tx_rollback_drops_writes() {
4558        // Phase 3 atomicity: writes through *_in_tx variants must roll back
4559        // when the transaction is dropped without commit() — e.g. an `?`
4560        // bubbled up an error mid-pipeline, or a panic unwound the stack.
4561        let db = Database::open_memory().unwrap();
4562        let sym = tx_test_symbol("a.py:function:foo", "a.py");
4563
4564        {
4565            let _tx = db.begin_indexing_tx().unwrap();
4566            db.insert_symbols_in_tx(std::slice::from_ref(&sym)).unwrap();
4567            // _tx dropped here without commit() — must roll back.
4568        }
4569
4570        let count: i64 = db
4571            .conn
4572            .query_row("SELECT COUNT(*) FROM symbols", [], |row| row.get(0))
4573            .unwrap();
4574        assert_eq!(
4575            count, 0,
4576            "writes must roll back when the indexing transaction is dropped without commit"
4577        );
4578    }
4579
4580    #[test]
4581    fn test_indexing_tx_partial_failure_rolls_back_full_pipeline() {
4582        // Phase 3 atomicity, end-to-end shape: simulate a multi-step pipeline
4583        // where step N fails after steps 1..N-1 already wrote. Without an
4584        // outer transaction, the prior writes would persist (the original
4585        // bug). With begin_indexing_tx wrapping the sequence, dropping `tx`
4586        // on the error path rolls every prior write back.
4587        let db = Database::open_memory().unwrap();
4588
4589        // Seed one pre-existing symbol so we can verify it survives the
4590        // rollback path (a regression here would also wipe pre-existing
4591        // data, which is the worst flavor of the bug).
4592        let pre = tx_test_symbol("pre.py:function:keep", "pre.py");
4593        db.insert_symbols(std::slice::from_ref(&pre)).unwrap();
4594
4595        // Run a "Phase 3 lookalike" that fails mid-way. The early `bail!`
4596        // means tx.commit() is unreachable; dropping `tx` on the error
4597        // path is exactly what we want to exercise.
4598        let result: Result<()> = (|| {
4599            let _tx = db.begin_indexing_tx()?;
4600            // Write a first batch.
4601            let batch1 = vec![tx_test_symbol("a.py:function:foo", "a.py")];
4602            db.insert_symbols_in_tx(&batch1)?;
4603
4604            // Simulate a downstream failure after a successful early write.
4605            anyhow::bail!("simulated mid-pipeline failure");
4606        })();
4607        assert!(result.is_err(), "the pipeline must propagate its error");
4608
4609        // The seed survives, the partial write does not.
4610        let names: Vec<String> = db
4611            .conn
4612            .prepare("SELECT id FROM symbols ORDER BY id")
4613            .unwrap()
4614            .query_map([], |row| row.get(0))
4615            .unwrap()
4616            .map(|r| r.unwrap())
4617            .collect();
4618        assert_eq!(
4619            names,
4620            vec!["pre.py:function:keep"],
4621            "pre-existing rows must survive; the partial write must roll back"
4622        );
4623    }
4624
4625    #[test]
4626    fn test_public_wrapper_still_self_commits() {
4627        // The public, non-`_in_tx` API must remain usable on its own —
4628        // existing callers (mcp server, watch, search, etc.) don't open
4629        // transactions and must keep working unchanged.
4630        let db = Database::open_memory().unwrap();
4631        let sym = tx_test_symbol("a.py:function:foo", "a.py");
4632
4633        // No outer transaction; the wrapper opens and commits its own.
4634        db.insert_symbols(std::slice::from_ref(&sym)).unwrap();
4635
4636        let count: i64 = db
4637            .conn
4638            .query_row("SELECT COUNT(*) FROM symbols", [], |row| row.get(0))
4639            .unwrap();
4640        assert_eq!(count, 1, "public wrapper must persist without an outer tx");
4641    }
4642
4643    #[test]
4644    fn test_partial_pipeline_without_outer_tx_persists_writes() {
4645        // Discriminator test: documents the *old* behavior. Without an
4646        // outer transaction, an error after a successful self-committing
4647        // write leaves that write persisted. This is exactly the bug the
4648        // outer transaction in `index_directory` fixes. If this assertion
4649        // ever flips, it means someone changed the public wrapper's
4650        // semantics — and `test_indexing_tx_partial_failure_rolls_back_full_pipeline`
4651        // would no longer be discriminating between buggy and fixed states.
4652        let db = Database::open_memory().unwrap();
4653
4654        let result: Result<()> = (|| {
4655            // Each call commits independently.
4656            let batch1 = vec![tx_test_symbol("a.py:function:foo", "a.py")];
4657            db.insert_symbols(&batch1)?;
4658            anyhow::bail!("simulated mid-pipeline failure");
4659        })();
4660        assert!(result.is_err());
4661
4662        let count: i64 = db
4663            .conn
4664            .query_row("SELECT COUNT(*) FROM symbols", [], |row| row.get(0))
4665            .unwrap();
4666        assert_eq!(
4667            count, 1,
4668            "without an outer transaction, an early write persists despite a later error"
4669        );
4670    }
4671
4672    // ── resolution_state (edge marker) tests ──
4673
4674    fn resolution_state_of(db: &Database, edge_id: i64) -> i64 {
4675        db.conn
4676            .query_row(
4677                "SELECT resolution_state FROM edges WHERE id = ?1",
4678                params![edge_id],
4679                |row| row.get(0),
4680            )
4681            .unwrap()
4682    }
4683
4684    fn resolution_source_of(db: &Database, edge_id: i64) -> Option<String> {
4685        db.conn
4686            .query_row(
4687                "SELECT resolution_source FROM edges WHERE id = ?1",
4688                params![edge_id],
4689                |row| row.get(0),
4690            )
4691            .unwrap()
4692    }
4693
4694    fn insert_test_edge(db: &Database, target_name: &str) -> i64 {
4695        let sym = test_symbol("src", SymbolKind::Function, "a.py", 1);
4696        db.insert_symbols(std::slice::from_ref(&sym)).unwrap();
4697        let edge = Edge::new(&sym.id, target_name, EdgeKind::Calls, "a.py", 1);
4698        db.insert_edge(&edge).unwrap();
4699        db.conn.last_insert_rowid()
4700    }
4701
4702    #[test]
4703    fn test_new_edge_has_default_state_zero() {
4704        let db = Database::open_memory().unwrap();
4705        let id = insert_test_edge(&db, "missing_target");
4706        assert_eq!(resolution_state_of(&db, id), 0);
4707    }
4708
4709    #[test]
4710    fn test_update_edge_target_flips_state_to_one() {
4711        let db = Database::open_memory().unwrap();
4712        let id = insert_test_edge(&db, "anything");
4713        db.update_edge_target(id, "some:symbol:id").unwrap();
4714        assert_eq!(resolution_state_of(&db, id), 1);
4715    }
4716
4717    #[test]
4718    fn test_mark_edge_unresolvable_sets_state_to_two() {
4719        let db = Database::open_memory().unwrap();
4720        let id = insert_test_edge(&db, "anything");
4721        db.mark_edge_unresolvable(id).unwrap();
4722        assert_eq!(resolution_state_of(&db, id), 2);
4723    }
4724
4725    #[test]
4726    fn test_unresolved_edges_excludes_state_two() {
4727        let db = Database::open_memory().unwrap();
4728        let _unresolved = insert_test_edge(&db, "still_unresolved");
4729        let burned = insert_test_edge(&db, "burned");
4730        db.mark_edge_unresolvable(burned).unwrap();
4731
4732        let edges = db.unresolved_edges().unwrap();
4733        let names: Vec<&str> = edges.iter().map(|e| e.target_name.as_str()).collect();
4734        assert!(names.contains(&"still_unresolved"));
4735        assert!(!names.contains(&"burned"));
4736    }
4737
4738    #[test]
4739    fn test_reset_unresolvable_for_names_targets_only_matching() {
4740        let db = Database::open_memory().unwrap();
4741        let burned_foo = insert_test_edge(&db, "foo");
4742        let burned_bar = insert_test_edge(&db, "bar");
4743        db.mark_edge_unresolvable(burned_foo).unwrap();
4744        db.mark_edge_unresolvable(burned_bar).unwrap();
4745
4746        let reopened = db
4747            .reset_unresolvable_for_names(&["foo".to_string()])
4748            .unwrap();
4749        assert_eq!(reopened, 1);
4750        assert_eq!(resolution_state_of(&db, burned_foo), 0);
4751        assert_eq!(resolution_state_of(&db, burned_bar), 2);
4752    }
4753
4754    #[test]
4755    fn test_reset_unresolvable_for_names_empty_is_noop() {
4756        let db = Database::open_memory().unwrap();
4757        let n = db.reset_unresolvable_for_names(&[]).unwrap();
4758        assert_eq!(n, 0);
4759    }
4760
4761    #[test]
4762    fn test_reset_unresolvable_for_names_does_not_touch_state_zero_or_one() {
4763        // The reset reopens state {2, 3} → state=0. Resolved (state=1) and
4764        // already-open (state=0) edges with matching names must be left alone.
4765        let db = Database::open_memory().unwrap();
4766        let still_open = insert_test_edge(&db, "foo"); // state=0
4767        let already_resolved = insert_test_edge(&db, "foo");
4768        db.update_edge_target(already_resolved, "some:id").unwrap(); // state=1
4769
4770        db.reset_unresolvable_for_names(&["foo".to_string()])
4771            .unwrap();
4772        assert_eq!(resolution_state_of(&db, still_open), 0);
4773        assert_eq!(resolution_state_of(&db, already_resolved), 1);
4774    }
4775
4776    #[test]
4777    fn test_mark_edge_external_sets_state_to_three() {
4778        let db = Database::open_memory().unwrap();
4779        let id = insert_test_edge(&db, "anything");
4780        db.mark_edge_external(id).unwrap();
4781        assert_eq!(resolution_state_of(&db, id), 3);
4782        assert_eq!(db.edge_resolution_state(id).unwrap(), 3);
4783    }
4784
4785    #[test]
4786    fn test_unresolved_edges_excludes_state_three() {
4787        // External (state=3) edges must be skipped by the LSP retry loop, same
4788        // as state=2 — otherwise we re-query dep targets on every dirty run.
4789        let db = Database::open_memory().unwrap();
4790        let _open = insert_test_edge(&db, "still_open");
4791        let ext = insert_test_edge(&db, "external_dep");
4792        db.mark_edge_external(ext).unwrap();
4793
4794        let edges = db.unresolved_edges().unwrap();
4795        let names: Vec<&str> = edges.iter().map(|e| e.target_name.as_str()).collect();
4796        assert!(names.contains(&"still_open"));
4797        assert!(!names.contains(&"external_dep"));
4798    }
4799
4800    #[test]
4801    fn test_reset_all_unresolvable_resets_state_two_and_three() {
4802        // `cartog index --force` must clear BOTH definitive markers (2 and 3)
4803        // so a forced re-index honors the "retry everything" contract.
4804        let db = Database::open_memory().unwrap();
4805        let burned = insert_test_edge(&db, "burned");
4806        let external = insert_test_edge(&db, "external");
4807        db.mark_edge_unresolvable(burned).unwrap();
4808        db.mark_edge_external(external).unwrap();
4809
4810        let reset = db.reset_all_unresolvable().unwrap();
4811        assert_eq!(reset, 2);
4812        assert_eq!(resolution_state_of(&db, burned), 0);
4813        assert_eq!(resolution_state_of(&db, external), 0);
4814    }
4815
4816    #[test]
4817    fn test_reset_unresolvable_for_names_reopens_state_three() {
4818        // External edges must also reopen when a matching symbol is added —
4819        // this is the "vendored dependency in-tree" path.
4820        let db = Database::open_memory().unwrap();
4821        let ext_foo = insert_test_edge(&db, "foo");
4822        let ext_bar = insert_test_edge(&db, "bar");
4823        db.mark_edge_external(ext_foo).unwrap();
4824        db.mark_edge_external(ext_bar).unwrap();
4825
4826        let reopened = db
4827            .reset_unresolvable_for_names(&["foo".to_string()])
4828            .unwrap();
4829        assert_eq!(reopened, 1);
4830        assert_eq!(resolution_state_of(&db, ext_foo), 0);
4831        assert_eq!(resolution_state_of(&db, ext_bar), 3);
4832    }
4833
4834    // ── state=4 (heuristic-exhausted) tests ──
4835
4836    #[test]
4837    fn test_mark_heuristic_exhausted_seals_unresolved_state_zero() {
4838        // Edges the heuristic couldn't resolve (state=0, target NULL) flip to
4839        // state=4 so the next re-index's resolution scan skips them.
4840        let db = Database::open_memory().unwrap();
4841        let unresolved = insert_test_edge(&db, "nowhere");
4842        let resolved = insert_test_edge(&db, "somewhere");
4843        db.update_edge_target(resolved, "some:id").unwrap();
4844
4845        let marked = db.mark_heuristic_exhausted_in_tx().unwrap();
4846        assert_eq!(marked, 1);
4847        assert_eq!(resolution_state_of(&db, unresolved), 4);
4848        assert_eq!(resolution_state_of(&db, resolved), 1, "resolved untouched");
4849    }
4850
4851    #[test]
4852    fn test_count_edges_in_state_buckets_by_state() {
4853        let db = Database::open_memory().unwrap();
4854        let resolved = insert_test_edge(&db, "somewhere");
4855        db.update_edge_target(resolved, "some:id").unwrap();
4856        let burned = insert_test_edge(&db, "burned");
4857        db.mark_edge_unresolvable(burned).unwrap();
4858
4859        assert_eq!(db.count_edges_in_state(0).unwrap(), 0);
4860        assert_eq!(db.count_edges_in_state(1).unwrap(), 1);
4861        assert_eq!(db.count_edges_in_state(2).unwrap(), 1);
4862    }
4863
4864    #[test]
4865    fn test_has_heuristic_exhausted_tracks_state_four() {
4866        let db = Database::open_memory().unwrap();
4867        let _edge = insert_test_edge(&db, "nowhere");
4868        assert!(!db.has_heuristic_exhausted().unwrap(), "state 0 not sealed");
4869        db.mark_heuristic_exhausted_in_tx().unwrap();
4870        assert!(db.has_heuristic_exhausted().unwrap());
4871    }
4872
4873    #[test]
4874    fn test_resolve_edges_skips_heuristic_exhausted_state_four() {
4875        // The state=0-only scan in resolve_edges_pass must not re-walk sealed
4876        // state=4 edges — this is the watch-mode amplification guard (#109).
4877        let db = Database::open_memory().unwrap();
4878        let eid = insert_test_edge(&db, "nowhere");
4879        db.mark_heuristic_exhausted_in_tx().unwrap();
4880        assert_eq!(resolution_state_of(&db, eid), 4);
4881
4882        // A fresh resolve pass finds nothing to do and leaves the seal intact.
4883        let resolved = db.resolve_edges().unwrap();
4884        assert_eq!(resolved, 0);
4885        assert_eq!(resolution_state_of(&db, eid), 4);
4886    }
4887
4888    #[test]
4889    fn test_unresolved_edges_excludes_state_four() {
4890        // The LSP retry loop must skip state=4 too, same as {2, 3}. The blanket
4891        // mark seals every open edge, so insert the still-open one afterward.
4892        let db = Database::open_memory().unwrap();
4893        let exhausted = insert_test_edge(&db, "exhausted");
4894        db.mark_heuristic_exhausted_in_tx().unwrap();
4895        let _open = insert_test_edge(&db, "still_open");
4896
4897        let edges = db.unresolved_edges().unwrap();
4898        let names: Vec<&str> = edges.iter().map(|e| e.target_name.as_str()).collect();
4899        assert!(names.contains(&"still_open"));
4900        assert!(!names.contains(&"exhausted"));
4901        let _ = exhausted;
4902    }
4903
4904    #[test]
4905    fn test_reopen_heuristic_exhausted_resets_only_state_four() {
4906        // Before an LSP-enabled reindex, state=4 → 0, but genuine LSP verdicts
4907        // (state {2, 3}) stay sealed.
4908        let db = Database::open_memory().unwrap();
4909        let exhausted = insert_test_edge(&db, "exhausted");
4910        db.mark_heuristic_exhausted_in_tx().unwrap();
4911        let burned = insert_test_edge(&db, "burned");
4912        db.mark_edge_unresolvable(burned).unwrap();
4913        let external = insert_test_edge(&db, "external");
4914        db.mark_edge_external(external).unwrap();
4915
4916        let reopened = db.reopen_heuristic_exhausted().unwrap();
4917        assert_eq!(reopened, 1);
4918        assert_eq!(resolution_state_of(&db, exhausted), 0);
4919        assert_eq!(resolution_state_of(&db, burned), 2, "LSP verdict sealed");
4920        assert_eq!(resolution_state_of(&db, external), 3, "LSP verdict sealed");
4921    }
4922
4923    #[test]
4924    fn test_reset_all_unresolvable_also_resets_state_four() {
4925        // --force must clear state=4 alongside {2, 3}.
4926        let db = Database::open_memory().unwrap();
4927        let exhausted = insert_test_edge(&db, "exhausted");
4928        db.mark_heuristic_exhausted_in_tx().unwrap();
4929        let burned = insert_test_edge(&db, "burned");
4930        db.mark_edge_unresolvable(burned).unwrap();
4931
4932        let reset = db.reset_all_unresolvable().unwrap();
4933        assert_eq!(reset, 2);
4934        assert_eq!(resolution_state_of(&db, exhausted), 0);
4935        assert_eq!(resolution_state_of(&db, burned), 0);
4936    }
4937
4938    #[test]
4939    fn test_reset_unresolvable_for_names_reopens_state_four() {
4940        // A heuristic-exhausted edge reopens when a matching symbol is added.
4941        let db = Database::open_memory().unwrap();
4942        let foo = insert_test_edge(&db, "foo");
4943        let bar = insert_test_edge(&db, "bar");
4944        db.mark_heuristic_exhausted_in_tx().unwrap();
4945
4946        let reopened = db
4947            .reset_unresolvable_for_names(&["foo".to_string()])
4948            .unwrap();
4949        assert_eq!(reopened, 1);
4950        assert_eq!(resolution_state_of(&db, foo), 0);
4951        assert_eq!(resolution_state_of(&db, bar), 4);
4952    }
4953
4954    #[test]
4955    fn test_stats_surfaces_external_and_unresolvable_counts() {
4956        let db = Database::open_memory().unwrap();
4957        let resolved = insert_test_edge(&db, "resolved_target");
4958        db.update_edge_target(resolved, "some:id").unwrap();
4959        let burned = insert_test_edge(&db, "burned");
4960        db.mark_edge_unresolvable(burned).unwrap();
4961        let external = insert_test_edge(&db, "external");
4962        db.mark_edge_external(external).unwrap();
4963        let _open = insert_test_edge(&db, "open");
4964
4965        let stats = db.stats().unwrap();
4966        assert_eq!(stats.num_resolved, 1);
4967        assert_eq!(stats.num_unresolvable, 1);
4968        assert_eq!(stats.num_external, 1);
4969        assert_eq!(stats.num_edges, 4);
4970    }
4971
4972    #[test]
4973    fn test_invalidate_edges_targeting_resets_state_when_target_disappears() {
4974        // When a symbol referenced by a resolved edge is removed, the edge
4975        // must drop back to (target_id NULL, state=0) so it re-enters the
4976        // unresolved set on the next pass.
4977        let db = Database::open_memory().unwrap();
4978
4979        // Set up: source edge points to symbol "ghost" via update_edge_target,
4980        // then drop the symbol so the edge becomes dangling.
4981        let src = test_symbol("src", SymbolKind::Function, "a.py", 1);
4982        let target = test_symbol("ghost", SymbolKind::Function, "b.py", 1);
4983        db.insert_symbols(&[src.clone(), target.clone()]).unwrap();
4984        let edge = Edge::new(&src.id, "ghost", EdgeKind::Calls, "a.py", 1);
4985        db.insert_edge(&edge).unwrap();
4986        let eid = db.conn.last_insert_rowid();
4987        db.update_edge_target(eid, &target.id).unwrap();
4988        assert_eq!(resolution_state_of(&db, eid), 1);
4989
4990        // Remove the target symbol — leaves edge.target_id pointing at nothing.
4991        db.conn
4992            .execute("DELETE FROM symbols WHERE id = ?1", params![target.id])
4993            .unwrap();
4994
4995        let mut dirty = std::collections::HashSet::new();
4996        dirty.insert("b.py".to_string());
4997        db.invalidate_edges_targeting(&dirty).unwrap();
4998
4999        assert_eq!(
5000            resolution_state_of(&db, eid),
5001            0,
5002            "dangling edge must return to state=0 so unresolved_edges() can see it"
5003        );
5004        let row: Option<String> = db
5005            .conn
5006            .query_row(
5007                "SELECT target_id FROM edges WHERE id = ?1",
5008                params![eid],
5009                |r| r.get(0),
5010            )
5011            .unwrap();
5012        assert!(row.is_none(), "target_id must be NULL after invalidation");
5013    }
5014
5015    #[test]
5016    fn test_delete_symbol_resets_state_on_dangling_incoming_edges() {
5017        // Regression for the "(target_id=NULL, state=1) zombie" bug: when a
5018        // resolved target symbol is deleted, every edge pointing to it must
5019        // drop back to state=0 — otherwise the edge becomes invisible to both
5020        // unresolved_edges() (state=1 filter) and graph traversal (NULL target).
5021        let db = Database::open_memory().unwrap();
5022        let src = test_symbol("caller", SymbolKind::Function, "a.py", 1);
5023        let target = test_symbol("ghost", SymbolKind::Function, "b.py", 1);
5024        db.insert_symbols(&[src.clone(), target.clone()]).unwrap();
5025        let edge = Edge::new(&src.id, "ghost", EdgeKind::Calls, "a.py", 1);
5026        db.insert_edge(&edge).unwrap();
5027        let eid = db.conn.last_insert_rowid();
5028        db.update_edge_target(eid, &target.id).unwrap();
5029
5030        db.delete_symbol(&target.id).unwrap();
5031
5032        assert_eq!(resolution_state_of(&db, eid), 0);
5033        assert_eq!(resolution_source_of(&db, eid), None, "stale tag must clear");
5034        let visible = db
5035            .unresolved_edges()
5036            .unwrap()
5037            .iter()
5038            .any(|e| e.edge_id == eid);
5039        assert!(
5040            visible,
5041            "orphaned edge must resurface in unresolved_edges()"
5042        );
5043    }
5044
5045    #[test]
5046    fn test_delete_symbols_in_tx_resets_state_on_dangling_incoming_edges() {
5047        // Same invariant as test_delete_symbol_..., for the batched path used
5048        // by the indexer's Merkle-diff `removed` set.
5049        let db = Database::open_memory().unwrap();
5050        let src = test_symbol("caller", SymbolKind::Function, "a.py", 1);
5051        let t1 = test_symbol("ghost1", SymbolKind::Function, "b.py", 1);
5052        let t2 = test_symbol("ghost2", SymbolKind::Function, "c.py", 1);
5053        db.insert_symbols(&[src.clone(), t1.clone(), t2.clone()])
5054            .unwrap();
5055        let e1 = Edge::new(&src.id, "ghost1", EdgeKind::Calls, "a.py", 1);
5056        db.insert_edge(&e1).unwrap();
5057        let eid1 = db.conn.last_insert_rowid();
5058        db.update_edge_target(eid1, &t1.id).unwrap();
5059        let e2 = Edge::new(&src.id, "ghost2", EdgeKind::Calls, "a.py", 2);
5060        db.insert_edge(&e2).unwrap();
5061        let eid2 = db.conn.last_insert_rowid();
5062        db.update_edge_target(eid2, &t2.id).unwrap();
5063
5064        assert_eq!(resolution_source_of(&db, eid1).as_deref(), Some("lsp"));
5065
5066        db.delete_symbols(&[t1.id.clone(), t2.id.clone()]).unwrap();
5067
5068        assert_eq!(resolution_state_of(&db, eid1), 0);
5069        assert_eq!(resolution_state_of(&db, eid2), 0);
5070        // Deleting the target unresolves the edge; its provenance must clear too,
5071        // else refs/callees report a stale tier for an edge pointing nowhere.
5072        assert_eq!(resolution_source_of(&db, eid1), None);
5073        assert_eq!(resolution_source_of(&db, eid2), None);
5074    }
5075
5076    #[test]
5077    fn test_heuristic_resolve_flips_state_to_one() {
5078        // Regression: resolve_edge_batch's UPDATE must set state=1 alongside
5079        // target_id. Otherwise heuristically-resolved edges stay state=0 and
5080        // get re-queried by LSP on the next pass — pure waste.
5081        let db = Database::open_memory().unwrap();
5082        let src = test_symbol("caller", SymbolKind::Function, "a.py", 1);
5083        let target = test_symbol("foo", SymbolKind::Function, "a.py", 10);
5084        db.insert_symbols(&[src.clone(), target.clone()]).unwrap();
5085        let edge = Edge::new(&src.id, "foo", EdgeKind::Calls, "a.py", 2);
5086        db.insert_edge(&edge).unwrap();
5087        let eid = db.conn.last_insert_rowid();
5088        assert_eq!(resolution_state_of(&db, eid), 0);
5089
5090        db.resolve_edges().unwrap();
5091
5092        assert_eq!(
5093            resolution_state_of(&db, eid),
5094            1,
5095            "heuristic resolve must set state=1 so LSP doesn't re-attack the edge"
5096        );
5097        assert!(
5098            db.unresolved_edges()
5099                .unwrap()
5100                .iter()
5101                .all(|e| e.edge_id != eid),
5102            "resolved edge must drop out of unresolved_edges()"
5103        );
5104    }
5105
5106    #[test]
5107    fn test_partial_unresolved_index_exists() {
5108        // The partial index speeds up the unresolved_edges() query on large
5109        // repos. Verify it actually got created by inspecting sqlite_master.
5110        let db = Database::open_memory().unwrap();
5111        let n: i64 = db
5112            .conn
5113            .query_row(
5114                "SELECT COUNT(*) FROM sqlite_master
5115                 WHERE type='index' AND name='idx_edges_unresolved'",
5116                [],
5117                |row| row.get(0),
5118            )
5119            .unwrap();
5120        assert_eq!(n, 1);
5121    }
5122
5123    #[test]
5124    fn test_resolution_state_default_via_insert_edges_batch() {
5125        // The batched insert path is the production hot path. Make sure
5126        // it honors the DEFAULT 0 just like single-row inserts do.
5127        let db = Database::open_memory().unwrap();
5128        let src = test_symbol("src", SymbolKind::Function, "a.py", 1);
5129        db.insert_symbols(std::slice::from_ref(&src)).unwrap();
5130        let edges = vec![
5131            Edge::new(&src.id, "x", EdgeKind::Calls, "a.py", 1),
5132            Edge::new(&src.id, "y", EdgeKind::Calls, "a.py", 2),
5133        ];
5134        db.insert_edges(&edges).unwrap();
5135        let states: Vec<i64> = db
5136            .conn
5137            .prepare("SELECT resolution_state FROM edges ORDER BY id")
5138            .unwrap()
5139            .query_map([], |row| row.get(0))
5140            .unwrap()
5141            .collect::<std::result::Result<_, _>>()
5142            .unwrap();
5143        assert_eq!(states, vec![0, 0]);
5144    }
5145
5146    #[test]
5147    fn test_migration_v3_to_v4_backfills_resolved_to_state_one() {
5148        // Simulate a pre-v4 database: open with v3-equivalent schema (no
5149        // resolution_state column, schema_version=3), insert edges with
5150        // and without target_ids, then re-open to trigger the migration.
5151        let tmp = tempfile::tempdir().unwrap();
5152        let path = tmp.path().join("v3.sqlite");
5153
5154        {
5155            let conn = Connection::open(&path).unwrap();
5156            // Bootstrap a v3-shaped edges table by hand.
5157            conn.execute_batch(
5158                "CREATE TABLE symbols (
5159                    id TEXT PRIMARY KEY, name TEXT, kind TEXT, file_path TEXT,
5160                    start_line INTEGER, end_line INTEGER, start_byte INTEGER, end_byte INTEGER,
5161                    parent_id TEXT, signature TEXT, visibility TEXT, is_async BOOLEAN,
5162                    docstring TEXT, in_degree INTEGER DEFAULT 0,
5163                    content_hash TEXT, subtree_hash TEXT);
5164                 CREATE TABLE edges (
5165                    id INTEGER PRIMARY KEY AUTOINCREMENT,
5166                    source_id TEXT NOT NULL, target_name TEXT NOT NULL, target_id TEXT,
5167                    kind TEXT NOT NULL, file_path TEXT NOT NULL, line INTEGER);
5168                 CREATE TABLE files (path TEXT PRIMARY KEY);
5169                 CREATE TABLE metadata (key TEXT PRIMARY KEY, value TEXT);
5170                 INSERT INTO metadata (key, value) VALUES ('schema_version', '3');
5171                 INSERT INTO symbols (id, name, kind, file_path) VALUES ('s:1', 'foo', 'function', 'a.py');
5172                 INSERT INTO edges (source_id, target_name, target_id, kind, file_path, line)
5173                   VALUES ('s:1', 'foo', 's:1', 'calls', 'a.py', 1);
5174                 INSERT INTO edges (source_id, target_name, target_id, kind, file_path, line)
5175                   VALUES ('s:1', 'missing', NULL, 'calls', 'a.py', 2);",
5176            )
5177            .unwrap();
5178        }
5179
5180        // Re-open through the production path so migrate() runs the full ladder.
5181        let db = Database::open(&path, DEFAULT_EMBEDDING_DIM).unwrap();
5182
5183        // The v3→4 migration adds the resolution_state column (schema transform);
5184        // verify it is present and queryable. The v7 stable-ID-escaping migration
5185        // clears the seeded rows, so the per-row backfill is no longer observable
5186        // after a full chain — assert the durable column + cleared-index contract.
5187        let has_resolution_state = db
5188            .conn
5189            .prepare("SELECT resolution_state FROM edges LIMIT 0")
5190            .is_ok();
5191        assert!(has_resolution_state, "v3→4 added resolution_state column");
5192
5193        let edge_count: i64 = db
5194            .conn
5195            .query_row("SELECT COUNT(*) FROM edges", [], |r| r.get(0))
5196            .unwrap();
5197        assert_eq!(edge_count, 0, "v7 cleared the index for full rebuild");
5198
5199        let bumped: String = db
5200            .conn
5201            .query_row(
5202                "SELECT value FROM metadata WHERE key = 'schema_version'",
5203                [],
5204                |r| r.get(0),
5205            )
5206            .unwrap();
5207        assert_eq!(bumped, SCHEMA_VERSION.to_string());
5208    }
5209
5210    // ── Edge provenance ──
5211
5212    /// Resolve a single `name`-targeting `calls` edge and return the provenance
5213    /// the resolver tagged on it. The caller wires up the symbol graph so a
5214    /// specific tier wins.
5215    fn resolve_one_and_get_provenance(db: &Database, name: &str) -> Option<EdgeProvenance> {
5216        let resolved = db.resolve_edges().unwrap();
5217        assert_eq!(resolved, 1, "expected exactly one edge to resolve");
5218        let refs = db.refs(name, None).unwrap();
5219        refs.into_iter()
5220            .find(|(e, _)| e.target_id.is_some())
5221            .and_then(|(e, _)| e.provenance)
5222    }
5223
5224    #[test]
5225    fn resolve_tags_provenance_same_file() {
5226        let db = Database::open_memory().unwrap();
5227        let caller = test_symbol("process", SymbolKind::Function, "a.py", 1);
5228        let same_file = test_symbol("helper", SymbolKind::Function, "a.py", 20);
5229        let other_file = test_symbol("helper", SymbolKind::Function, "b.py", 1);
5230        db.insert_symbols(&[caller.clone(), same_file, other_file])
5231            .unwrap();
5232        db.insert_edge(&Edge::new(&caller.id, "helper", EdgeKind::Calls, "a.py", 5))
5233            .unwrap();
5234
5235        assert_eq!(
5236            resolve_one_and_get_provenance(&db, "helper"),
5237            Some(EdgeProvenance::SameFile)
5238        );
5239    }
5240
5241    #[test]
5242    fn resolve_tags_provenance_same_dir() {
5243        let db = Database::open_memory().unwrap();
5244        let caller = test_symbol("process", SymbolKind::Function, "pkg/a.py", 1);
5245        let same_dir = test_symbol("helper", SymbolKind::Function, "pkg/b.py", 1);
5246        let far = test_symbol("helper", SymbolKind::Function, "other/c.py", 1);
5247        db.insert_symbols(&[caller.clone(), same_dir, far]).unwrap();
5248        db.insert_edge(&Edge::new(
5249            &caller.id,
5250            "helper",
5251            EdgeKind::Calls,
5252            "pkg/a.py",
5253            5,
5254        ))
5255        .unwrap();
5256
5257        assert_eq!(
5258            resolve_one_and_get_provenance(&db, "helper"),
5259            Some(EdgeProvenance::SameDir)
5260        );
5261    }
5262
5263    #[test]
5264    fn resolve_tags_provenance_unique_global() {
5265        let db = Database::open_memory().unwrap();
5266        let caller = test_symbol("process", SymbolKind::Function, "a.py", 1);
5267        let target = test_symbol("only_one", SymbolKind::Function, "far/away.py", 1);
5268        db.insert_symbols(&[caller.clone(), target]).unwrap();
5269        db.insert_edge(&Edge::new(
5270            &caller.id,
5271            "only_one",
5272            EdgeKind::Calls,
5273            "a.py",
5274            5,
5275        ))
5276        .unwrap();
5277
5278        assert_eq!(
5279            resolve_one_and_get_provenance(&db, "only_one"),
5280            Some(EdgeProvenance::UniqueGlobal)
5281        );
5282    }
5283
5284    #[test]
5285    fn resolve_tags_provenance_kind_disambig() {
5286        let db = Database::open_memory().unwrap();
5287        // Two global matches: a class beats the constructor method (tier 6).
5288        let caller = test_symbol("handleLogin", SymbolKind::Method, "auth/Service.java", 10);
5289        let logger_class = test_symbol("Logger", SymbolKind::Class, "util/Logger.java", 1);
5290        let logger_ctor = test_symbol("Logger", SymbolKind::Method, "util/Logger.java", 5);
5291        db.insert_symbols(&[caller.clone(), logger_class, logger_ctor])
5292            .unwrap();
5293        db.insert_edge(&Edge::new(
5294            &caller.id,
5295            "Logger",
5296            EdgeKind::References,
5297            "auth/Service.java",
5298            12,
5299        ))
5300        .unwrap();
5301
5302        db.resolve_edges().unwrap();
5303        let refs = db.refs("Logger", None).unwrap();
5304        let edge = refs
5305            .iter()
5306            .find(|(e, _)| e.kind == EdgeKind::References)
5307            .unwrap();
5308        assert_eq!(edge.0.provenance, Some(EdgeProvenance::KindDisambig));
5309    }
5310
5311    #[test]
5312    fn resolve_tags_provenance_parent_scope() {
5313        // Tier 4 only fires when same-file/import/same-dir miss and there are
5314        // multiple global matches, one sharing the caller's parent scope. Build
5315        // two `helper`s in different dirs from the caller's file, both children
5316        // of the same parent as the caller, so only parent-scope disambiguates.
5317        let db = Database::open_memory().unwrap();
5318        let mut caller = test_symbol("run", SymbolKind::Method, "app/svc.py", 10);
5319        caller.parent_id = Some("app/svc.py:class:Svc".to_string());
5320        let mut same_scope = test_symbol("helper", SymbolKind::Method, "lib/a.py", 1);
5321        same_scope.parent_id = Some("app/svc.py:class:Svc".to_string());
5322        let mut other_scope = test_symbol("helper", SymbolKind::Method, "lib/b.py", 1);
5323        other_scope.parent_id = Some("other/x.py:class:Other".to_string());
5324        db.insert_symbols(&[caller.clone(), same_scope.clone(), other_scope])
5325            .unwrap();
5326        db.insert_edge(&Edge::new(
5327            &caller.id,
5328            "helper",
5329            EdgeKind::Calls,
5330            "app/svc.py",
5331            12,
5332        ))
5333        .unwrap();
5334
5335        assert_eq!(
5336            resolve_one_and_get_provenance(&db, "helper"),
5337            Some(EdgeProvenance::ParentScope)
5338        );
5339    }
5340
5341    #[test]
5342    fn callees_surfaces_provenance() {
5343        // Read-back coverage for the callees() path (uses the shared row_to_edge).
5344        let db = Database::open_memory().unwrap();
5345        let caller = test_symbol("process", SymbolKind::Function, "a.py", 1);
5346        let same_file = test_symbol("helper", SymbolKind::Function, "a.py", 20);
5347        db.insert_symbols(&[caller.clone(), same_file]).unwrap();
5348        db.insert_edge(&Edge::new(&caller.id, "helper", EdgeKind::Calls, "a.py", 5))
5349            .unwrap();
5350        db.resolve_edges().unwrap();
5351
5352        let callees = db.callees("process").unwrap();
5353        assert_eq!(callees.len(), 1);
5354        assert_eq!(callees[0].provenance, Some(EdgeProvenance::SameFile));
5355    }
5356
5357    #[test]
5358    fn impact_surfaces_provenance() {
5359        // Read-back coverage for the impact() CTE mapper (depth at index 7,
5360        // provenance at index 6).
5361        let db = Database::open_memory().unwrap();
5362        let caller = test_symbol("process", SymbolKind::Function, "a.py", 1);
5363        let target = test_symbol("helper", SymbolKind::Function, "a.py", 20);
5364        db.insert_symbols(&[caller.clone(), target]).unwrap();
5365        db.insert_edge(&Edge::new(&caller.id, "helper", EdgeKind::Calls, "a.py", 5))
5366            .unwrap();
5367        db.resolve_edges().unwrap();
5368
5369        let impact = db.impact("helper", 3).unwrap();
5370        let call = impact
5371            .iter()
5372            .find(|(e, _)| e.kind == EdgeKind::Calls)
5373            .unwrap();
5374        assert_eq!(call.0.provenance, Some(EdgeProvenance::SameFile));
5375    }
5376
5377    #[test]
5378    fn reset_unresolvable_for_names_clears_provenance() {
5379        // The per-reindex reopen path (indexer calls this on every incremental
5380        // run) must clear the stale LSP tag, not just the state.
5381        let db = Database::open_memory().unwrap();
5382        let id = insert_test_edge(&db, "foo");
5383        db.mark_edge_unresolvable(id).unwrap();
5384        assert_eq!(
5385            resolution_source_of(&db, id).as_deref(),
5386            Some("lsp_unresolvable")
5387        );
5388
5389        let reopened = db
5390            .reset_unresolvable_for_names(&["foo".to_string()])
5391            .unwrap();
5392        assert_eq!(reopened, 1);
5393        assert_eq!(resolution_source_of(&db, id), None, "stale tag cleared");
5394    }
5395
5396    #[test]
5397    fn insert_edge_round_trips_provenance() {
5398        // A reconstructed (already-resolved) edge persists its provenance through
5399        // insert and reads back identically.
5400        let db = Database::open_memory().unwrap();
5401        let caller = test_symbol("process", SymbolKind::Function, "a.py", 1);
5402        let target = test_symbol("helper", SymbolKind::Function, "a.py", 20);
5403        db.insert_symbols(&[caller.clone(), target.clone()])
5404            .unwrap();
5405        let mut edge = Edge::new(&caller.id, "helper", EdgeKind::Calls, "a.py", 5);
5406        edge.target_id = Some(target.id.clone());
5407        edge.provenance = Some(EdgeProvenance::Lsp);
5408        db.insert_edge(&edge).unwrap();
5409        let eid = db.conn.last_insert_rowid();
5410
5411        let callees = db.callees("process").unwrap();
5412        assert_eq!(callees[0].provenance, Some(EdgeProvenance::Lsp));
5413        // An inserted edge that already has a target must persist resolution_state=1,
5414        // not the column default 0 — else stats()/unresolved_edges() misreport it.
5415        assert_eq!(resolution_state_of(&db, eid), 1);
5416        assert!(
5417            !db.unresolved_edges()
5418                .unwrap()
5419                .iter()
5420                .any(|e| e.edge_id == eid),
5421            "a resolved insert must not resurface as unresolved"
5422        );
5423    }
5424
5425    #[test]
5426    fn insert_edge_without_target_is_unresolved() {
5427        // The extraction path inserts edges with no target_id; they must land at
5428        // resolution_state=0 so resolve_edges()/LSP pick them up.
5429        let db = Database::open_memory().unwrap();
5430        let src = test_symbol("src", SymbolKind::Function, "a.py", 1);
5431        db.insert_symbols(std::slice::from_ref(&src)).unwrap();
5432        db.insert_edge(&Edge::new(&src.id, "missing", EdgeKind::Calls, "a.py", 1))
5433            .unwrap();
5434        let eid = db.conn.last_insert_rowid();
5435        assert_eq!(resolution_state_of(&db, eid), 0);
5436    }
5437
5438    #[test]
5439    fn resolve_tags_provenance_import_path() {
5440        // Two-pass: the import edge resolves in pass 1 (tier 6, class over ctor),
5441        // then the reference in the importing file resolves via import-path in
5442        // pass 2. Mirrors test_resolve_edges_multipass_import_then_call.
5443        let db = Database::open_memory().unwrap();
5444        let import_sym = test_symbol("util.Logger", SymbolKind::Import, "auth/service.java", 1);
5445        let caller = test_symbol("authenticate", SymbolKind::Method, "auth/service.java", 10);
5446        let logger_class = test_symbol("Logger", SymbolKind::Class, "util/Logger.java", 1);
5447        let logger_ctor = test_symbol("Logger", SymbolKind::Method, "util/Logger.java", 5);
5448        db.insert_symbols(&[
5449            import_sym.clone(),
5450            caller.clone(),
5451            logger_class,
5452            logger_ctor,
5453        ])
5454        .unwrap();
5455        db.insert_edge(&Edge::new(
5456            &import_sym.id,
5457            "Logger",
5458            EdgeKind::Imports,
5459            "auth/service.java",
5460            1,
5461        ))
5462        .unwrap();
5463        db.insert_edge(&Edge::new(
5464            &caller.id,
5465            "Logger",
5466            EdgeKind::References,
5467            "auth/service.java",
5468            15,
5469        ))
5470        .unwrap();
5471
5472        assert_eq!(db.resolve_edges().unwrap(), 2);
5473        let refs = db.refs("Logger", None).unwrap();
5474        let reference = refs
5475            .iter()
5476            .find(|(e, _)| e.kind == EdgeKind::References)
5477            .unwrap();
5478        assert_eq!(reference.0.provenance, Some(EdgeProvenance::ImportPath));
5479    }
5480
5481    #[test]
5482    fn lsp_resolve_tags_provenance_lsp() {
5483        let db = Database::open_memory().unwrap();
5484        let id = insert_test_edge(&db, "anything");
5485        db.update_edge_target(id, "some:symbol:id").unwrap();
5486        assert_eq!(resolution_source_of(&db, id).as_deref(), Some("lsp"));
5487    }
5488
5489    #[test]
5490    fn lsp_overwrite_retags_heuristic_as_lsp() {
5491        let db = Database::open_memory().unwrap();
5492        let caller = test_symbol("process", SymbolKind::Function, "a.py", 1);
5493        let same_file = test_symbol("helper", SymbolKind::Function, "a.py", 20);
5494        db.insert_symbols(&[caller.clone(), same_file.clone()])
5495            .unwrap();
5496        db.insert_edge(&Edge::new(&caller.id, "helper", EdgeKind::Calls, "a.py", 5))
5497            .unwrap();
5498        db.resolve_edges().unwrap();
5499
5500        let edge_id: i64 = db
5501            .conn
5502            .query_row("SELECT id FROM edges LIMIT 1", [], |r| r.get(0))
5503            .unwrap();
5504        assert_eq!(
5505            resolution_source_of(&db, edge_id).as_deref(),
5506            Some("same_file")
5507        );
5508
5509        db.update_edge_target(edge_id, &same_file.id).unwrap();
5510        assert_eq!(resolution_source_of(&db, edge_id).as_deref(), Some("lsp"));
5511    }
5512
5513    #[test]
5514    fn mark_external_tags_lsp_external() {
5515        let db = Database::open_memory().unwrap();
5516        let id = insert_test_edge(&db, "anything");
5517        db.mark_edge_external(id).unwrap();
5518        assert_eq!(
5519            resolution_source_of(&db, id).as_deref(),
5520            Some("lsp_external")
5521        );
5522    }
5523
5524    #[test]
5525    fn mark_unresolvable_tags_lsp_unresolvable() {
5526        let db = Database::open_memory().unwrap();
5527        let id = insert_test_edge(&db, "anything");
5528        db.mark_edge_unresolvable(id).unwrap();
5529        assert_eq!(
5530            resolution_source_of(&db, id).as_deref(),
5531            Some("lsp_unresolvable")
5532        );
5533    }
5534
5535    #[test]
5536    fn reset_unresolvable_clears_provenance() {
5537        let db = Database::open_memory().unwrap();
5538        let id = insert_test_edge(&db, "foo");
5539        db.mark_edge_external(id).unwrap();
5540        assert_eq!(
5541            resolution_source_of(&db, id).as_deref(),
5542            Some("lsp_external")
5543        );
5544
5545        db.reset_all_unresolvable().unwrap();
5546        assert_eq!(resolution_source_of(&db, id), None, "stale tag cleared");
5547    }
5548
5549    /// Bootstrap a pre-v6-shaped DB at `path`: edges have `resolution_state` but
5550    /// no `resolution_source` column, stamped at `schema_version`. Shared by the
5551    /// migration tests so both exercise the same "old" shape.
5552    fn bootstrap_pre_v6_db(path: &std::path::Path, schema_version: u32, seed_edges: bool) {
5553        let conn = Connection::open(path).unwrap();
5554        conn.execute_batch(
5555            "CREATE TABLE symbols (
5556                id TEXT PRIMARY KEY, name TEXT, kind TEXT, file_path TEXT,
5557                start_line INTEGER, end_line INTEGER, start_byte INTEGER, end_byte INTEGER,
5558                parent_id TEXT, signature TEXT, visibility TEXT, is_async BOOLEAN,
5559                docstring TEXT, in_degree INTEGER DEFAULT 0,
5560                content_hash TEXT, subtree_hash TEXT);
5561             CREATE TABLE edges (
5562                id INTEGER PRIMARY KEY AUTOINCREMENT,
5563                source_id TEXT NOT NULL, target_name TEXT NOT NULL, target_id TEXT,
5564                kind TEXT NOT NULL, file_path TEXT NOT NULL, line INTEGER,
5565                resolution_state INTEGER NOT NULL DEFAULT 0);
5566             CREATE TABLE files (path TEXT PRIMARY KEY);
5567             CREATE TABLE metadata (key TEXT PRIMARY KEY, value TEXT);
5568             CREATE TABLE query_log (id INTEGER PRIMARY KEY AUTOINCREMENT,
5569                tool TEXT NOT NULL, source TEXT NOT NULL, ts INTEGER NOT NULL);",
5570        )
5571        .unwrap();
5572        conn.execute(
5573            "INSERT INTO metadata (key, value) VALUES ('schema_version', ?1)",
5574            params![schema_version.to_string()],
5575        )
5576        .unwrap();
5577        if seed_edges {
5578            conn.execute_batch(
5579                "INSERT INTO symbols (id, name, kind, file_path) VALUES ('s:1', 'foo', 'function', 'a.py');
5580                 INSERT INTO edges (source_id, target_name, target_id, kind, file_path, line, resolution_state)
5581                   VALUES ('s:1', 'foo', 's:1', 'calls', 'a.py', 1, 1);
5582                 INSERT INTO edges (source_id, target_name, target_id, kind, file_path, line, resolution_state)
5583                   VALUES ('s:1', 'missing', NULL, 'calls', 'a.py', 2, 0);",
5584            )
5585            .unwrap();
5586        }
5587    }
5588
5589    #[test]
5590    fn migration_v5_to_v6_adds_resolution_source_column() {
5591        // A pre-v6 DB (resolution_state present, resolution_source absent) gains
5592        // the resolution_source column on open. The v7 stable-ID-escaping
5593        // migration then clears the seeded rows, so assert the durable column +
5594        // cleared-index contract rather than the now-wiped per-row backfill.
5595        let tmp = tempfile::tempdir().unwrap();
5596        let path = tmp.path().join("v5.sqlite");
5597        bootstrap_pre_v6_db(&path, 5, true);
5598
5599        let db = Database::open(&path, DEFAULT_EMBEDDING_DIM).unwrap();
5600
5601        let has_resolution_source = db
5602            .conn
5603            .prepare("SELECT resolution_source FROM edges LIMIT 0")
5604            .is_ok();
5605        assert!(has_resolution_source, "v5→6 added resolution_source column");
5606
5607        let edge_count: i64 = db
5608            .conn
5609            .query_row("SELECT COUNT(*) FROM edges", [], |r| r.get(0))
5610            .unwrap();
5611        assert_eq!(edge_count, 0, "v7 cleared the index for full rebuild");
5612
5613        let bumped: String = db
5614            .conn
5615            .query_row(
5616                "SELECT value FROM metadata WHERE key = 'schema_version'",
5617                [],
5618                |r| r.get(0),
5619            )
5620            .unwrap();
5621        assert_eq!(bumped, SCHEMA_VERSION.to_string());
5622    }
5623
5624    #[test]
5625    fn migration_v6_self_heals_missing_column() {
5626        // schema_version says 6 but the column is absent (partial-migration
5627        // crash). The probe guard must re-add it on open.
5628        let tmp = tempfile::tempdir().unwrap();
5629        let path = tmp.path().join("partial.sqlite");
5630        bootstrap_pre_v6_db(&path, 6, false);
5631
5632        let db = Database::open(&path, DEFAULT_EMBEDDING_DIM).unwrap();
5633        let has_col = db
5634            .conn
5635            .prepare("SELECT resolution_source FROM edges LIMIT 0")
5636            .is_ok();
5637        assert!(has_col, "missing resolution_source column was re-added");
5638    }
5639
5640    /// Bootstrap a v6-shaped DB (all columns present, stamped at v6) with one
5641    /// seeded row in every table the v7 wipe clears, plus a `last_commit`, so the
5642    /// wipe is observable per table. `symbol_content` uses the real shape and the
5643    /// FTS5 vtable + insert/delete triggers so its row inserts (and the wipe's
5644    /// delete) keep the external-content index consistent.
5645    fn bootstrap_v6_db(path: &std::path::Path) {
5646        let conn = Connection::open(path).unwrap();
5647        conn.execute_batch(
5648            "CREATE TABLE symbols (
5649                id TEXT PRIMARY KEY, name TEXT, kind TEXT, file_path TEXT,
5650                start_line INTEGER, end_line INTEGER, start_byte INTEGER, end_byte INTEGER,
5651                parent_id TEXT, signature TEXT, visibility TEXT, is_async BOOLEAN,
5652                docstring TEXT, in_degree INTEGER DEFAULT 0,
5653                content_hash TEXT, subtree_hash TEXT);
5654             CREATE TABLE edges (
5655                id INTEGER PRIMARY KEY AUTOINCREMENT,
5656                source_id TEXT NOT NULL, target_name TEXT NOT NULL, target_id TEXT,
5657                kind TEXT NOT NULL, file_path TEXT NOT NULL, line INTEGER,
5658                resolution_state INTEGER NOT NULL DEFAULT 0, resolution_source TEXT);
5659             CREATE TABLE files (path TEXT PRIMARY KEY);
5660             CREATE TABLE symbol_content (
5661                symbol_id TEXT PRIMARY KEY, content TEXT NOT NULL, header TEXT NOT NULL,
5662                normalized_name TEXT NOT NULL DEFAULT '');
5663             CREATE VIRTUAL TABLE symbol_fts USING fts5(
5664                symbol_name, normalized_name, content,
5665                content=symbol_content, content_rowid=rowid);
5666             CREATE TRIGGER symbol_content_ai AFTER INSERT ON symbol_content BEGIN
5667                INSERT INTO symbol_fts(rowid, symbol_name, normalized_name, content)
5668                VALUES (new.rowid, (SELECT name FROM symbols WHERE id = new.symbol_id),
5669                        new.normalized_name, new.content);
5670             END;
5671             CREATE TRIGGER symbol_content_ad AFTER DELETE ON symbol_content BEGIN
5672                INSERT INTO symbol_fts(symbol_fts, rowid, symbol_name, normalized_name, content)
5673                VALUES ('delete', old.rowid, (SELECT name FROM symbols WHERE id = old.symbol_id),
5674                        old.normalized_name, old.content);
5675             END;
5676             CREATE TABLE symbol_embedding_map (symbol_id TEXT NOT NULL);
5677             CREATE TABLE metadata (key TEXT PRIMARY KEY, value TEXT);
5678             CREATE TABLE query_log (id INTEGER PRIMARY KEY AUTOINCREMENT,
5679                tool TEXT NOT NULL, source TEXT NOT NULL, ts INTEGER NOT NULL);
5680             INSERT INTO symbols (id, name, kind, file_path) VALUES ('a.py:import:os.path', 'os.path', 'import', 'a.py');
5681             INSERT INTO files (path) VALUES ('a.py');
5682             INSERT INTO edges (source_id, target_name, kind, file_path, line)
5683                VALUES ('a.py:import:os.path', 'os', 'imports', 'a.py', 1);
5684             INSERT INTO symbol_content (symbol_id, content, header)
5685                VALUES ('a.py:import:os.path', 'body', 'sig');
5686             INSERT INTO symbol_embedding_map (symbol_id) VALUES ('a.py:import:os.path');
5687             INSERT INTO metadata (key, value) VALUES ('schema_version', '6');
5688             INSERT INTO metadata (key, value) VALUES ('last_commit', 'deadbeef');",
5689        )
5690        .unwrap();
5691    }
5692
5693    #[test]
5694    fn migration_v6_to_v7_clears_index_for_full_rebuild() {
5695        // The v7 symbol-ID escaping changes the ID format, so old (collidable)
5696        // rows must be wiped and last_commit cleared so the next index rebuilds
5697        // every file from scratch.
5698        let tmp = tempfile::tempdir().unwrap();
5699        let path = tmp.path().join("v6.sqlite");
5700        bootstrap_v6_db(&path);
5701
5702        let db = Database::open(&path, DEFAULT_EMBEDDING_DIM).unwrap();
5703
5704        let count = |table: &str| -> i64 {
5705            db.conn
5706                .query_row(&format!("SELECT COUNT(*) FROM {table}"), [], |r| r.get(0))
5707                .unwrap()
5708        };
5709        assert_eq!(count("symbols"), 0, "symbols cleared");
5710        assert_eq!(count("edges"), 0, "edges cleared");
5711        assert_eq!(count("files"), 0, "files cleared");
5712        assert_eq!(count("symbol_content"), 0, "symbol_content cleared");
5713        assert_eq!(
5714            count("symbol_embedding_map"),
5715            0,
5716            "symbol_embedding_map cleared"
5717        );
5718
5719        let last_commit: Option<String> = db
5720            .conn
5721            .query_row(
5722                "SELECT value FROM metadata WHERE key = 'last_commit'",
5723                [],
5724                |r| r.get(0),
5725            )
5726            .optional()
5727            .unwrap();
5728        assert_eq!(
5729            last_commit, None,
5730            "last_commit cleared to force full reindex"
5731        );
5732
5733        let bumped: String = db
5734            .conn
5735            .query_row(
5736                "SELECT value FROM metadata WHERE key = 'schema_version'",
5737                [],
5738                |r| r.get(0),
5739            )
5740            .unwrap();
5741        assert_eq!(bumped, SCHEMA_VERSION.to_string());
5742
5743        // The v7 wipe is destructive, so the pre-migration DB must be backed up
5744        // first — same safety contract as the v2→3 wipe.
5745        let backups = std::fs::read_dir(tmp.path())
5746            .unwrap()
5747            .filter_map(|e| e.ok())
5748            .filter(|e| {
5749                e.file_name()
5750                    .to_string_lossy()
5751                    .starts_with("v6.sqlite.pre-v")
5752            })
5753            .count();
5754        assert_eq!(backups, 1, "v6→7 wipe must back up the index first");
5755    }
5756
5757    #[test]
5758    fn read_metadata_at_returns_value_when_present() {
5759        let dir = tempfile::TempDir::new().unwrap();
5760        let db_path = dir.path().join("test.db");
5761        {
5762            let db = Database::open(&db_path, 384).unwrap();
5763            db.set_metadata("last_commit", "abc1234").unwrap();
5764        }
5765        assert_eq!(
5766            read_metadata_at(&db_path, "last_commit").unwrap(),
5767            Some("abc1234".to_string())
5768        );
5769    }
5770
5771    #[test]
5772    fn read_metadata_at_returns_none_when_row_absent() {
5773        let dir = tempfile::TempDir::new().unwrap();
5774        let db_path = dir.path().join("test.db");
5775        // A freshly opened cartog DB has a metadata table but no last_commit row.
5776        let _db = Database::open(&db_path, 384).unwrap();
5777        assert_eq!(read_metadata_at(&db_path, "last_commit").unwrap(), None);
5778    }
5779
5780    #[test]
5781    fn read_metadata_at_returns_none_for_non_cartog_sqlite() {
5782        let dir = tempfile::TempDir::new().unwrap();
5783        let db_path = dir.path().join("foreign.db");
5784        // A SQLite file with no `metadata` table is not a cartog DB; the helper
5785        // treats the missing table as an absent value, not an error.
5786        let conn = Connection::open(&db_path).unwrap();
5787        conn.execute_batch("CREATE TABLE notes(content TEXT);")
5788            .unwrap();
5789        drop(conn);
5790        assert_eq!(read_metadata_at(&db_path, "last_commit").unwrap(), None);
5791    }
5792
5793    #[test]
5794    fn read_metadata_at_returns_none_for_null_value() {
5795        let dir = tempfile::TempDir::new().unwrap();
5796        let db_path = dir.path().join("test.db");
5797        {
5798            let db = Database::open(&db_path, 384).unwrap();
5799            // A corrupt/hand-edited NULL value must read as absent, not error.
5800            db.conn
5801                .execute(
5802                    "INSERT OR REPLACE INTO metadata (key, value) VALUES ('last_commit', NULL)",
5803                    [],
5804                )
5805                .unwrap();
5806        }
5807        assert_eq!(read_metadata_at(&db_path, "last_commit").unwrap(), None);
5808    }
5809}