Skip to main content

normalize_facts/
index.rs

1use crate::symbols::SymbolParser;
2use ignore::WalkBuilder;
3use indicatif::{ParallelProgressIterator, ProgressBar, ProgressStyle};
4use libsql::{Connection, Database, params};
5pub use normalize_facts_core::IndexedFile;
6use normalize_facts_core::{FlatImport, FlatSymbol, TypeRef};
7use normalize_languages::support_for_path;
8use rayon::prelude::*;
9use std::path::{Path, PathBuf};
10use std::time::{SystemTime, UNIX_EPOCH};
11
12/// A single CFG block row ready for DB insertion.
13struct CfgBlockRow {
14    function_qname: String,
15    function_start_line: u32,
16    block_id: u32,
17    kind: String,
18    byte_start: usize,
19    byte_end: usize,
20    start_line: u32,
21    end_line: u32,
22}
23
24/// A single CFG edge row ready for DB insertion.
25struct CfgEdgeRow {
26    function_qname: String,
27    function_start_line: u32,
28    from_block: u32,
29    to_block: u32,
30    kind: String,
31    /// Exception type for EdgeKind::Exception edges (None = conservative).
32    exception_type: Option<String>,
33}
34
35/// A single CFG def row ready for DB insertion.
36struct CfgDefRow {
37    function_qname: String,
38    function_start_line: u32,
39    block_id: u32,
40    name: String,
41    byte_offset: usize,
42    line: u32,
43}
44
45/// A single CFG use row ready for DB insertion.
46struct CfgUseRow {
47    function_qname: String,
48    function_start_line: u32,
49    block_id: u32,
50    name: String,
51    byte_offset: usize,
52    line: u32,
53}
54
55/// A single CFG effect row ready for DB insertion.
56struct CfgEffectRow {
57    function_qname: String,
58    function_start_line: u32,
59    block_id: u32,
60    kind: String,
61    byte_offset: usize,
62    line: u32,
63    label: Option<String>,
64}
65
66/// CFG rows for a single file, ready for DB insertion.
67struct CfgData {
68    blocks: Vec<CfgBlockRow>,
69    edges: Vec<CfgEdgeRow>,
70    defs: Vec<CfgDefRow>,
71    uses: Vec<CfgUseRow>,
72    effects: Vec<CfgEffectRow>,
73}
74
75/// A parsed symbol ready for database insertion.
76#[derive(serde::Serialize, serde::Deserialize)]
77struct ParsedSymbol {
78    name: String,
79    kind: String,
80    start_line: usize,
81    end_line: usize,
82    parent: Option<String>,
83    visibility: String,
84    attributes: Vec<String>,
85    is_interface_impl: bool,
86    implements: Vec<String>,
87    docstring: Option<String>,
88}
89
90/// One call-site entry: (caller_symbol, callee_name, callee_qualifier, access, line).
91type CallEntry = (String, String, Option<String>, Option<String>, usize);
92
93/// Parsed data for a single file, ready for database insertion
94struct ParsedFileData {
95    file_path: String,
96    symbols: Vec<ParsedSymbol>,
97    calls: Vec<CallEntry>,
98    /// imports (for Python files only)
99    imports: Vec<FlatImport>,
100    /// (type_name, method_name) for interface/class method signatures
101    type_methods: Vec<(String, String)>,
102    /// Type-to-type references (field types, param types, extends, etc.)
103    type_refs: Vec<TypeRef>,
104    /// CFG data (blocks, edges, defs, uses) for function-level analysis.
105    cfg: CfgData,
106}
107
108/// CA-cache payload: all extracted data for a single file, keyed by content hash.
109/// Does not include `file_path` — that is the lookup key, not part of the payload.
110#[derive(serde::Serialize, serde::Deserialize)]
111struct CachedFileData {
112    symbols: Vec<ParsedSymbol>,
113    calls: Vec<CallEntry>,
114    imports: Vec<FlatImport>,
115    type_methods: Vec<(String, String)>,
116    type_refs: Vec<TypeRef>,
117}
118
119// Not yet public - just delete .normalize/index.sqlite on schema changes
120const SCHEMA_VERSION: i64 = 15;
121
122/// Bump when extraction logic changes to invalidate cached results.
123/// Bumped to "2" (2026-04-27): purge CA cache entries that may have been poisoned
124/// by the old bug where rebuilds without grammars loaded cached empty results.
125const EXTRACTOR_VERSION: &str = "2";
126
127/// Check if a file path has a supported source extension.
128fn is_source_file(path: &str) -> bool {
129    normalize_languages::support_for_path(std::path::Path::new(path)).is_some()
130}
131
132/// Generate SQL WHERE clause for filtering source files.
133/// Returns: "path LIKE '%.py' OR path LIKE '%.rs' OR ..."
134fn source_extensions_sql_filter() -> String {
135    let mut extensions: Vec<&str> = normalize_languages::supported_languages()
136        .iter()
137        .flat_map(|lang| lang.extensions().iter().copied())
138        .collect();
139    extensions.sort_unstable();
140    extensions.dedup();
141    extensions
142        .iter()
143        .map(|ext| format!("path LIKE '%.{}'", ext))
144        .collect::<Vec<_>>()
145        .join(" OR ")
146}
147
148/// Result from symbol search
149#[derive(Debug, Clone, serde::Serialize)]
150pub struct SymbolMatch {
151    pub name: String,
152    pub kind: String,
153    pub file: String,
154    pub start_line: usize,
155    pub end_line: usize,
156    pub parent: Option<String>,
157}
158
159/// Files that changed since last index
160#[derive(Debug, Default)]
161pub struct ChangedFiles {
162    pub added: Vec<String>,
163    pub modified: Vec<String>,
164    pub deleted: Vec<String>,
165}
166
167/// Call graph statistics
168#[derive(Debug, Clone, Copy, Default)]
169pub struct CallGraphStats {
170    pub symbols: usize,
171    pub calls: usize,
172    pub imports: usize,
173}
174
175pub struct FileIndex {
176    conn: Connection,
177    #[allow(dead_code)]
178    db: Database,
179    root: PathBuf,
180    progress: bool,
181    /// Content-addressed extraction cache (optional; best-effort).
182    ca_cache: Option<crate::ca_cache::CaCache>,
183}
184
185impl FileIndex {
186    /// Open or create an index at the specified database path.
187    /// On corruption, automatically deletes and recreates the index.
188    ///
189    /// # Arguments
190    /// * `db_path` - Path to the SQLite database file
191    /// * `root` - Project root directory (used for file walking during refresh)
192    pub async fn open(db_path: &Path, root: &Path) -> Result<Self, libsql::Error> {
193        // Ensure parent directory exists
194        if let Some(parent) = db_path.parent()
195            && let Err(e) = std::fs::create_dir_all(parent)
196        {
197            tracing::warn!(
198                "normalize-facts: failed to create index directory {:?}: {}",
199                parent,
200                e
201            );
202        }
203
204        // Try to open, with recovery on corruption
205        match Self::try_open(db_path, root).await {
206            Ok(idx) => Ok(idx),
207            Err(e) => {
208                // Check for corruption-like errors
209                let err_str = e.to_string().to_lowercase();
210                let is_corruption = err_str.contains("corrupt")
211                    || err_str.contains("malformed")
212                    || err_str.contains("disk i/o error")
213                    || err_str.contains("not a database")
214                    || err_str.contains("database disk image")
215                    || err_str.contains("integrity check failed");
216
217                if is_corruption {
218                    tracing::warn!("Index corrupted, rebuilding: {}", e);
219                    // Delete corrupted database and retry
220                    let _ = std::fs::remove_file(db_path);
221                    // Also remove journal/wal files if they exist
222                    let _ = std::fs::remove_file(db_path.with_extension("sqlite-journal"));
223                    let _ = std::fs::remove_file(db_path.with_extension("sqlite-wal"));
224                    let _ = std::fs::remove_file(db_path.with_extension("sqlite-shm"));
225                    Self::try_open(db_path, root).await
226                } else {
227                    Err(e)
228                }
229            }
230        }
231    }
232
233    /// Internal: try to open database without recovery
234    async fn try_open(db_path: &Path, root: &Path) -> Result<Self, libsql::Error> {
235        let db = libsql::Builder::new_local(db_path).build().await?;
236        let conn = db.connect()?;
237
238        // Quick integrity check - this will catch most corruption
239        // PRAGMA quick_check is faster than full integrity_check
240        let mut rows = conn.query("PRAGMA quick_check(1)", ()).await?;
241        let integrity: String = if let Some(row) = rows.next().await? {
242            row.get(0).unwrap_or_else(|_| "error".to_string())
243        } else {
244            "error".to_string()
245        };
246        if integrity != "ok" {
247            return Err(libsql::Error::SqliteFailure(
248                11, // SQLITE_CORRUPT
249                format!("Database integrity check failed: {}", integrity),
250            ));
251        }
252
253        // Initialize schema
254        conn.execute(
255            "CREATE TABLE IF NOT EXISTS meta (
256                key TEXT PRIMARY KEY,
257                value TEXT
258            )",
259            (),
260        )
261        .await?;
262        conn.execute(
263            "CREATE TABLE IF NOT EXISTS files (
264                path TEXT PRIMARY KEY,
265                is_dir INTEGER NOT NULL,
266                mtime INTEGER NOT NULL,
267                lines INTEGER NOT NULL DEFAULT 0
268            )",
269            (),
270        )
271        .await?;
272        conn.execute(
273            "CREATE INDEX IF NOT EXISTS idx_files_name ON files(path)",
274            (),
275        )
276        .await?;
277
278        // Call graph for fast caller/callee lookups
279        conn.execute(
280            "CREATE TABLE IF NOT EXISTS calls (
281                caller_file TEXT NOT NULL,
282                caller_symbol TEXT NOT NULL,
283                callee_name TEXT NOT NULL,
284                callee_qualifier TEXT,
285                callee_resolved_file TEXT,
286                line INTEGER NOT NULL,
287                access TEXT
288            )",
289            (),
290        )
291        .await?;
292        conn.execute(
293            "CREATE INDEX IF NOT EXISTS idx_calls_callee ON calls(callee_name)",
294            (),
295        )
296        .await?;
297        conn.execute(
298            "CREATE INDEX IF NOT EXISTS idx_calls_caller ON calls(caller_file, caller_symbol)",
299            (),
300        )
301        .await?;
302        conn.execute(
303            "CREATE INDEX IF NOT EXISTS idx_calls_qualifier ON calls(callee_qualifier)",
304            (),
305        )
306        .await?;
307        // May fail on old DBs where the column doesn't exist yet; migration below adds it.
308        conn.execute(
309            "CREATE INDEX IF NOT EXISTS idx_calls_resolved ON calls(callee_resolved_file)",
310            (),
311        )
312        .await
313        .ok();
314
315        // Symbol definitions
316        conn.execute(
317            "CREATE TABLE IF NOT EXISTS symbols (
318                file TEXT NOT NULL,
319                name TEXT NOT NULL,
320                kind TEXT NOT NULL,
321                start_line INTEGER NOT NULL,
322                end_line INTEGER NOT NULL,
323                parent TEXT,
324                visibility TEXT NOT NULL DEFAULT 'public',
325                is_impl INTEGER NOT NULL DEFAULT 0
326            )",
327            (),
328        )
329        .await?;
330        conn.execute(
331            "CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name)",
332            (),
333        )
334        .await?;
335        conn.execute(
336            "CREATE INDEX IF NOT EXISTS idx_symbols_file ON symbols(file)",
337            (),
338        )
339        .await?;
340
341        // Symbol attributes (one row per attribute per symbol)
342        conn.execute(
343            "CREATE TABLE IF NOT EXISTS symbol_attributes (
344                file TEXT NOT NULL,
345                name TEXT NOT NULL,
346                attribute TEXT NOT NULL
347            )",
348            (),
349        )
350        .await?;
351        conn.execute(
352            "CREATE INDEX IF NOT EXISTS idx_symbol_attributes_file_name ON symbol_attributes(file, name)",
353            (),
354        )
355        .await?;
356
357        // Symbol implements (one row per interface/trait per symbol)
358        conn.execute(
359            "CREATE TABLE IF NOT EXISTS symbol_implements (
360                file TEXT NOT NULL,
361                name TEXT NOT NULL,
362                interface TEXT NOT NULL
363            )",
364            (),
365        )
366        .await?;
367        conn.execute(
368            "CREATE INDEX IF NOT EXISTS idx_symbol_implements_file_name ON symbol_implements(file, name)",
369            (),
370        )
371        .await?;
372
373        // Import tracking
374        conn.execute(
375            "CREATE TABLE IF NOT EXISTS imports (
376                file TEXT NOT NULL,
377                module TEXT,
378                name TEXT NOT NULL,
379                alias TEXT,
380                line INTEGER NOT NULL,
381                resolved_file TEXT,
382                is_reexport INTEGER NOT NULL DEFAULT 0
383            )",
384            (),
385        )
386        .await?;
387        conn.execute(
388            "CREATE INDEX IF NOT EXISTS idx_imports_file ON imports(file)",
389            (),
390        )
391        .await?;
392        conn.execute(
393            "CREATE INDEX IF NOT EXISTS idx_imports_name ON imports(name)",
394            (),
395        )
396        .await?;
397        conn.execute(
398            "CREATE INDEX IF NOT EXISTS idx_imports_module ON imports(module)",
399            (),
400        )
401        .await?;
402        conn.execute(
403            "CREATE INDEX IF NOT EXISTS idx_imports_resolved ON imports(resolved_file)",
404            (),
405        )
406        .await?;
407
408        // Type method signatures
409        conn.execute(
410            "CREATE TABLE IF NOT EXISTS type_methods (
411                file TEXT NOT NULL,
412                type_name TEXT NOT NULL,
413                method_name TEXT NOT NULL,
414                PRIMARY KEY (file, type_name, method_name)
415            )",
416            (),
417        )
418        .await?;
419        conn.execute(
420            "CREATE INDEX IF NOT EXISTS idx_type_methods_type ON type_methods(type_name)",
421            (),
422        )
423        .await?;
424
425        // Type references (type-to-type dependencies)
426        conn.execute(
427            "CREATE TABLE IF NOT EXISTS type_refs (
428                file TEXT NOT NULL,
429                source_symbol TEXT NOT NULL,
430                target_type TEXT NOT NULL,
431                kind TEXT NOT NULL,
432                line INTEGER NOT NULL
433            )",
434            (),
435        )
436        .await?;
437        conn.execute(
438            "CREATE INDEX IF NOT EXISTS idx_type_refs_file ON type_refs(file)",
439            (),
440        )
441        .await?;
442        conn.execute(
443            "CREATE INDEX IF NOT EXISTS idx_type_refs_source ON type_refs(source_symbol)",
444            (),
445        )
446        .await?;
447        conn.execute(
448            "CREATE INDEX IF NOT EXISTS idx_type_refs_target ON type_refs(target_type)",
449            (),
450        )
451        .await?;
452
453        // Migrate existing tables: add columns that may be missing from older schemas.
454        // SQLite errors on duplicate ADD COLUMN, so we ignore failures.
455        conn.execute(
456            "ALTER TABLE symbols ADD COLUMN visibility TEXT NOT NULL DEFAULT 'public'",
457            (),
458        )
459        .await
460        .ok();
461        conn.execute(
462            "ALTER TABLE symbols ADD COLUMN is_impl INTEGER NOT NULL DEFAULT 0",
463            (),
464        )
465        .await
466        .ok();
467        // resolved_file was added to imports after schema version 5 was already set;
468        // run unconditionally so existing v5 DBs without the column get migrated.
469        conn.execute("ALTER TABLE imports ADD COLUMN resolved_file TEXT", ())
470            .await
471            .ok();
472
473        // Check schema version
474        let mut rows = conn
475            .query(
476                "SELECT CAST(value AS INTEGER) FROM meta WHERE key = 'schema_version'",
477                (),
478            )
479            .await?;
480        let version: i64 = if let Some(row) = rows.next().await? {
481            row.get(0).unwrap_or(0)
482        } else {
483            0
484        };
485
486        if version != SCHEMA_VERSION {
487            // Reset on schema change
488            conn.execute("DELETE FROM files", ()).await?;
489            conn.execute("DELETE FROM calls", ()).await?;
490            conn.execute("DELETE FROM symbols", ()).await?;
491            conn.execute("DELETE FROM imports", ()).await?;
492            // Add new columns that may not exist in older schema versions.
493            // Use .ok() to tolerate "duplicate column" errors on already-migrated DBs.
494            conn.execute("ALTER TABLE imports ADD COLUMN resolved_file TEXT", ())
495                .await
496                .ok(); // ignore "duplicate column" error on fresh DBs
497            conn.execute(
498                "ALTER TABLE imports ADD COLUMN is_reexport INTEGER NOT NULL DEFAULT 0",
499                (),
500            )
501            .await
502            .ok(); // ignore "duplicate column" error on fresh DBs
503            conn.execute("ALTER TABLE calls ADD COLUMN callee_resolved_file TEXT", ())
504                .await
505                .ok(); // ignore "duplicate column" error on fresh DBs
506            conn.execute("ALTER TABLE calls ADD COLUMN access TEXT", ())
507                .await
508                .ok(); // ignore "duplicate column" error on fresh DBs
509            conn.execute(
510                "CREATE INDEX IF NOT EXISTS idx_calls_resolved ON calls(callee_resolved_file)",
511                (),
512            )
513            .await?;
514            conn.execute("DELETE FROM type_methods", ()).await?;
515            conn.execute("DELETE FROM type_refs", ()).await?;
516            conn.execute("DELETE FROM symbol_attributes", ()).await?;
517            conn.execute("DELETE FROM symbol_implements", ()).await?;
518            // co_change_edges: clear on schema bump so the next rebuild repopulates.
519            conn.execute("DELETE FROM co_change_edges", ()).await.ok();
520            conn.execute("DELETE FROM meta WHERE key = 'co_change_last_commit'", ())
521                .await
522                .ok();
523            // CFG tables: clear so next rebuild repopulates them.
524            conn.execute("DELETE FROM cfg_blocks", ()).await.ok();
525            conn.execute("DELETE FROM cfg_edges", ()).await.ok();
526            conn.execute("DELETE FROM cfg_defs", ()).await.ok();
527            conn.execute("DELETE FROM cfg_uses", ()).await.ok();
528            conn.execute("DELETE FROM cfg_effects", ()).await.ok();
529            // Both diagnostic tables get dropped + recreated on every schema bump
530            // (column shape has changed in past bumps and may again — simplest path).
531            conn.execute("DROP TABLE IF EXISTS daemon_diagnostics", ())
532                .await
533                .ok();
534            conn.execute("DROP TABLE IF EXISTS daemon_diagnostics_per_file", ())
535                .await
536                .ok();
537            conn.execute(
538                "INSERT OR REPLACE INTO meta (key, value) VALUES ('schema_version', ?1)",
539                params![SCHEMA_VERSION.to_string()],
540            )
541            .await?;
542        }
543
544        // Create convenience views for agent queries.
545        // These are idempotent (CREATE VIEW IF NOT EXISTS) and safe to run on every open.
546
547        // entry_points: public symbols that are never called internally.
548        // Identifies API surface that external callers enter through — functions/types
549        // that are exported but have no recorded callers within the indexed codebase.
550        // Useful for finding dead public API candidates and top-level entry symbols.
551        conn.execute(
552            "CREATE VIEW IF NOT EXISTS entry_points AS
553             SELECT s.file, s.name, s.kind, s.start_line, s.end_line
554             FROM symbols s
555             WHERE s.visibility = 'public'
556               AND NOT EXISTS (
557                   SELECT 1 FROM calls c WHERE c.callee_name = s.name
558               )",
559            (),
560        )
561        .await
562        .ok();
563
564        // external_deps: imports whose module specifier could not be resolved to a
565        // file within the indexed root (resolved_file IS NULL). These represent
566        // third-party packages, stdlib imports, or imports outside the project root.
567        // Used to distinguish in-project edges from external dependencies in analysis.
568        conn.execute(
569            "CREATE VIEW IF NOT EXISTS external_deps AS
570             SELECT file, module, name, alias, line
571             FROM imports
572             WHERE resolved_file IS NULL",
573            (),
574        )
575        .await
576        .ok();
577
578        // external_surface: public symbols that are called by files whose own imports
579        // include at least one unresolved (external) dependency.
580        // Identifies the boundary between internal implementation and externally-facing
581        // API — the symbols that external-dependency-using files actually invoke.
582        conn.execute(
583            "CREATE VIEW IF NOT EXISTS external_surface AS
584             SELECT DISTINCT s.file, s.name, s.kind, s.start_line, s.end_line
585             FROM symbols s
586             WHERE s.visibility = 'public'
587               AND EXISTS (
588                   SELECT 1 FROM calls c
589                   WHERE c.callee_name = s.name
590                     AND EXISTS (
591                         SELECT 1 FROM external_deps ed WHERE ed.file = c.caller_file
592                     )
593               )",
594            (),
595        )
596        .await
597        .ok();
598
599        // Co-change edges: file pairs that appear together in commits.
600        // Populated by rebuild_co_change_edges(); queried by coupling-clusters.
601        conn.execute(
602            "CREATE TABLE IF NOT EXISTS co_change_edges (
603                file_a TEXT NOT NULL,
604                file_b TEXT NOT NULL,
605                count INTEGER NOT NULL,
606                PRIMARY KEY (file_a, file_b)
607            )",
608            (),
609        )
610        .await?;
611        conn.execute(
612            "CREATE INDEX IF NOT EXISTS idx_co_change_file_a ON co_change_edges(file_a)",
613            (),
614        )
615        .await?;
616        conn.execute(
617            "CREATE INDEX IF NOT EXISTS idx_co_change_file_b ON co_change_edges(file_b)",
618            (),
619        )
620        .await?;
621
622        // CFG blocks, edges, defs, and uses for control-flow analysis.
623        conn.execute(
624            "CREATE TABLE IF NOT EXISTS cfg_blocks (
625                id INTEGER PRIMARY KEY,
626                file TEXT NOT NULL,
627                function_qname TEXT NOT NULL,
628                function_start_line INTEGER NOT NULL,
629                block_id INTEGER NOT NULL,
630                kind TEXT NOT NULL,
631                byte_start INTEGER NOT NULL,
632                byte_end INTEGER NOT NULL,
633                start_line INTEGER NOT NULL,
634                end_line INTEGER NOT NULL,
635                UNIQUE(file, function_qname, function_start_line, block_id)
636            )",
637            (),
638        )
639        .await?;
640        conn.execute(
641            "CREATE INDEX IF NOT EXISTS idx_cfg_blocks_file ON cfg_blocks(file)",
642            (),
643        )
644        .await?;
645        conn.execute(
646            "CREATE INDEX IF NOT EXISTS idx_cfg_blocks_func ON cfg_blocks(file, function_qname, function_start_line)",
647            (),
648        )
649        .await?;
650
651        conn.execute(
652            "CREATE TABLE IF NOT EXISTS cfg_edges (
653                id INTEGER PRIMARY KEY,
654                file TEXT NOT NULL,
655                function_qname TEXT NOT NULL,
656                function_start_line INTEGER NOT NULL,
657                from_block INTEGER NOT NULL,
658                to_block INTEGER NOT NULL,
659                kind TEXT NOT NULL,
660                exception_type TEXT
661            )",
662            (),
663        )
664        .await?;
665        conn.execute(
666            "CREATE INDEX IF NOT EXISTS idx_cfg_edges_func ON cfg_edges(file, function_qname, function_start_line)",
667            (),
668        )
669        .await?;
670
671        conn.execute(
672            "CREATE TABLE IF NOT EXISTS cfg_defs (
673                id INTEGER PRIMARY KEY,
674                file TEXT NOT NULL,
675                function_qname TEXT NOT NULL,
676                function_start_line INTEGER NOT NULL,
677                block_id INTEGER NOT NULL,
678                name TEXT NOT NULL,
679                byte_offset INTEGER NOT NULL,
680                line INTEGER NOT NULL
681            )",
682            (),
683        )
684        .await?;
685        conn.execute(
686            "CREATE INDEX IF NOT EXISTS idx_cfg_defs_func ON cfg_defs(file, function_qname, function_start_line)",
687            (),
688        )
689        .await?;
690
691        conn.execute(
692            "CREATE TABLE IF NOT EXISTS cfg_uses (
693                id INTEGER PRIMARY KEY,
694                file TEXT NOT NULL,
695                function_qname TEXT NOT NULL,
696                function_start_line INTEGER NOT NULL,
697                block_id INTEGER NOT NULL,
698                name TEXT NOT NULL,
699                byte_offset INTEGER NOT NULL,
700                line INTEGER NOT NULL
701            )",
702            (),
703        )
704        .await?;
705        conn.execute(
706            "CREATE INDEX IF NOT EXISTS idx_cfg_uses_func ON cfg_uses(file, function_qname, function_start_line)",
707            (),
708        )
709        .await?;
710
711        conn.execute(
712            "CREATE TABLE IF NOT EXISTS cfg_effects (
713                id INTEGER PRIMARY KEY,
714                file TEXT NOT NULL,
715                function_qname TEXT NOT NULL,
716                function_start_line INTEGER NOT NULL,
717                block_id INTEGER NOT NULL,
718                kind TEXT NOT NULL,
719                byte_offset INTEGER NOT NULL,
720                line INTEGER NOT NULL,
721                label TEXT
722            )",
723            (),
724        )
725        .await?;
726        conn.execute(
727            "CREATE INDEX IF NOT EXISTS idx_cfg_effects_func ON cfg_effects(file, function_qname, function_start_line)",
728            (),
729        )
730        .await?;
731
732        // Daemon diagnostics cache: one row per engine. `config_hash` mismatch on load = cache miss.
733        conn.execute(
734            "CREATE TABLE IF NOT EXISTS daemon_diagnostics (
735                engine TEXT PRIMARY KEY,
736                issues_blob BLOB NOT NULL,
737                config_hash TEXT NOT NULL,
738                updated_at INTEGER NOT NULL
739            )",
740            (),
741        )
742        .await?;
743
744        // Per-file diagnostics cache: one row per file that currently has issues.
745        // "No row" semantics — files with zero issues are absent from the table.
746        // Used by the daemon to serve per-file `RunRules` queries directly without
747        // touching the "all" blob.
748        conn.execute(
749            "CREATE TABLE IF NOT EXISTS daemon_diagnostics_per_file (
750                path TEXT PRIMARY KEY,
751                issues_blob BLOB NOT NULL,
752                config_hash TEXT NOT NULL,
753                updated_at INTEGER NOT NULL
754            )",
755            (),
756        )
757        .await?;
758
759        // Open CA cache (best-effort — a failure here is non-fatal)
760        let ca_cache = match crate::ca_cache::CaCache::open(
761            &crate::ca_cache::CaCache::default_path(),
762            1024 * 1024 * 1024, // 1 GiB limit
763        ) {
764            Ok(c) => {
765                // GC stale versions at startup (best-effort)
766                if let Err(e) = c.gc_stale_versions(EXTRACTOR_VERSION) {
767                    tracing::warn!("normalize-facts: CA cache GC error: {}", e);
768                }
769                Some(c)
770            }
771            Err(e) => {
772                tracing::warn!("normalize-facts: failed to open CA cache: {}", e);
773                None
774            }
775        };
776
777        Ok(Self {
778            conn,
779            db,
780            root: root.to_path_buf(),
781            progress: false,
782            ca_cache,
783        })
784    }
785
786    /// Enable progress bar output for long-running operations (refresh, call graph).
787    /// Only shows bars when stderr is a terminal.
788    pub fn set_progress(&mut self, enabled: bool) {
789        self.progress = enabled;
790    }
791
792    /// Get a reference to the underlying SQLite connection for direct queries
793    pub fn connection(&self) -> &Connection {
794        &self.conn
795    }
796
797    /// Get files that have changed since last index
798    pub async fn get_changed_files(&self) -> Result<ChangedFiles, libsql::Error> {
799        let mut result = ChangedFiles::default();
800
801        // Get all indexed files with their mtimes
802        let mut indexed: std::collections::HashMap<String, i64> = std::collections::HashMap::new();
803        {
804            let mut rows = self
805                .conn
806                .query("SELECT path, mtime FROM files WHERE is_dir = 0", ())
807                .await?;
808            while let Some(row) = rows.next().await? {
809                let path: String = row.get(0)?;
810                let mtime: i64 = row.get(1)?;
811                indexed.insert(path, mtime);
812            }
813        }
814
815        // Walk current filesystem
816        let walker = WalkBuilder::new(&self.root)
817            .hidden(false)
818            .git_ignore(true)
819            .git_global(true)
820            .git_exclude(true)
821            .build();
822
823        let mut seen = std::collections::HashSet::new();
824        for entry in walker.flatten() {
825            let path = entry.path();
826            if path.is_dir() {
827                continue;
828            }
829            if let Ok(rel) = path.strip_prefix(&self.root) {
830                let rel_str = rel.to_string_lossy().to_string();
831                // Skip internal directories
832                if rel_str.is_empty() || rel_str == ".git" || rel_str.starts_with(".git/") {
833                    continue;
834                }
835                seen.insert(rel_str.clone());
836
837                let current_mtime = path
838                    .metadata()
839                    .ok()
840                    .and_then(|m| m.modified().ok())
841                    .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
842                    .map(|d| d.as_secs() as i64)
843                    .unwrap_or(0);
844
845                if let Some(&indexed_mtime) = indexed.get(&rel_str) {
846                    if current_mtime > indexed_mtime {
847                        result.modified.push(rel_str);
848                    }
849                } else {
850                    result.added.push(rel_str);
851                }
852            }
853        }
854
855        // Find deleted files
856        for path in indexed.keys() {
857            if !seen.contains(path) {
858                result.deleted.push(path.clone());
859            }
860        }
861
862        Ok(result)
863    }
864
865    /// Check if refresh is needed using fast heuristics.
866    /// Returns true if changes are likely.
867    async fn needs_refresh(&self) -> bool {
868        let mut rows = match self
869            .conn
870            .query(
871                "SELECT CAST(value AS INTEGER) FROM meta WHERE key = 'last_indexed'",
872                (),
873            )
874            .await
875        {
876            Ok(r) => r,
877            Err(_) => return true,
878        };
879        let last_indexed: i64 = match rows.next().await {
880            Ok(Some(row)) => row.get(0).unwrap_or(0),
881            _ => 0,
882        };
883
884        // Never indexed
885        if last_indexed == 0 {
886            return true;
887        }
888
889        let now = SystemTime::now()
890            .duration_since(UNIX_EPOCH)
891            .map(|d| d.as_secs() as i64)
892            .unwrap_or(0);
893
894        // Allow 60s staleness - don't check on every call
895        if now - last_indexed < 60 {
896            return false;
897        }
898
899        // Check mtimes of top-level entries (catches new/deleted files)
900        if let Ok(entries) = std::fs::read_dir(&self.root) {
901            for entry in entries.flatten() {
902                let name = entry.file_name();
903                let name_str = name.to_string_lossy();
904                if name_str.starts_with('.') {
905                    continue;
906                }
907                if let Ok(meta) = entry.metadata()
908                    && let Ok(mtime) = meta.modified()
909                {
910                    let mtime_secs = mtime
911                        .duration_since(UNIX_EPOCH)
912                        .map(|d| d.as_secs() as i64)
913                        .unwrap_or(0);
914                    if mtime_secs > last_indexed {
915                        return true;
916                    }
917                }
918            }
919        }
920
921        // Sample some indexed files to catch modifications
922        // Check ~100 files spread across the index
923        if let Ok(mut rows) = self
924            .conn
925            .query(
926                "SELECT path, mtime FROM files WHERE is_dir = 0 ORDER BY RANDOM() LIMIT 100",
927                (),
928            )
929            .await
930        {
931            while let Ok(Some(row)) = rows.next().await {
932                let path: String = match row.get(0) {
933                    Ok(p) => p,
934                    Err(_) => continue,
935                };
936                let indexed_mtime: i64 = match row.get(1) {
937                    Ok(m) => m,
938                    Err(_) => continue,
939                };
940                let full_path = self.root.join(&path);
941                if let Ok(meta) = full_path.metadata()
942                    && let Ok(mtime) = meta.modified()
943                {
944                    let current_mtime = mtime
945                        .duration_since(UNIX_EPOCH)
946                        .map(|d| d.as_secs() as i64)
947                        .unwrap_or(0);
948                    if current_mtime > indexed_mtime {
949                        return true;
950                    }
951                }
952            }
953        }
954
955        false
956    }
957
958    /// Test/maintenance helper: clear the `last_indexed` meta value so the next
959    /// `needs_refresh()` returns `true` regardless of the 60-second debounce.
960    ///
961    /// Used by integration tests that need to force refresh after each file
962    /// edit without waiting for the staleness window.
963    pub async fn invalidate_last_indexed(&self) -> Result<(), libsql::Error> {
964        self.conn
965            .execute("DELETE FROM meta WHERE key = 'last_indexed'", ())
966            .await?;
967        Ok(())
968    }
969
970    /// Refresh only files that have changed (faster than full refresh).
971    /// Returns the list of changed file paths (absolute) that were added, modified, or deleted.
972    /// The count can be derived from `.len()`.
973    pub async fn incremental_refresh(&mut self) -> Result<Vec<PathBuf>, libsql::Error> {
974        if !self.needs_refresh().await {
975            return Ok(Vec::new());
976        }
977        self.incremental_refresh_force().await
978    }
979
980    /// Refresh only files that have changed, bypassing the `needs_refresh()`
981    /// staleness gate.
982    ///
983    /// `incremental_refresh()` short-circuits if the index was refreshed within
984    /// the last 60 seconds and no top-level mtime changes are visible — a cheap
985    /// "probably nothing changed" heuristic for cold-CLI callers running many
986    /// commands in quick succession. For an event-driven daemon, the watcher
987    /// firing **is** the signal that something changed, so the gate is wrong.
988    /// Daemons should call this variant.
989    pub async fn incremental_refresh_force(&mut self) -> Result<Vec<PathBuf>, libsql::Error> {
990        let changed = self.get_changed_files().await?;
991        let total_changes = changed.added.len() + changed.modified.len() + changed.deleted.len();
992
993        if total_changes == 0 {
994            return Ok(Vec::new());
995        }
996
997        self.conn.execute("BEGIN", ()).await?;
998
999        // Delete removed files
1000        for path in &changed.deleted {
1001            self.conn
1002                .execute("DELETE FROM files WHERE path = ?1", params![path.clone()])
1003                .await?;
1004        }
1005
1006        // Update/insert changed files
1007        for path in changed.added.iter().chain(changed.modified.iter()) {
1008            let full_path = self.root.join(path);
1009            let is_dir = full_path.is_dir();
1010            let mtime = full_path
1011                .metadata()
1012                .ok()
1013                .and_then(|m| m.modified().ok())
1014                .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
1015                .map(|d| d.as_secs() as i64)
1016                .unwrap_or(0);
1017            // Count lines for text files (binary files will fail read_to_string and get 0)
1018            let lines = if is_dir {
1019                0
1020            } else {
1021                std::fs::read_to_string(&full_path)
1022                    .map(|s| s.lines().count())
1023                    .unwrap_or(0)
1024            };
1025
1026            self.conn.execute(
1027                "INSERT OR REPLACE INTO files (path, is_dir, mtime, lines) VALUES (?1, ?2, ?3, ?4)",
1028                params![path.clone(), is_dir as i64, mtime, lines as i64],
1029            ).await?;
1030        }
1031
1032        // Update last indexed time
1033        let now = SystemTime::now()
1034            .duration_since(UNIX_EPOCH)
1035            .map(|d| d.as_secs() as i64)
1036            .unwrap_or(0);
1037        self.conn
1038            .execute(
1039                "INSERT OR REPLACE INTO meta (key, value) VALUES ('last_indexed', ?1)",
1040                params![now.to_string()],
1041            )
1042            .await?;
1043
1044        self.conn.execute("COMMIT", ()).await?;
1045
1046        // Collect all changed paths as absolute PathBufs
1047        let all_changed: Vec<PathBuf> = changed
1048            .added
1049            .iter()
1050            .chain(changed.modified.iter())
1051            .chain(changed.deleted.iter())
1052            .map(|p| self.root.join(p))
1053            .collect();
1054
1055        Ok(all_changed)
1056    }
1057
1058    /// Execute a raw SQL statement (for maintenance operations).
1059    pub async fn execute(&self, sql: &str) -> Result<u64, libsql::Error> {
1060        self.conn.execute(sql, ()).await
1061    }
1062
1063    /// Run an arbitrary read-only SQL query and return results as a list of row maps.
1064    ///
1065    /// Each row is a `serde_json::Map` from column name to value.
1066    /// Useful for agent-driven exploration of the structural index.
1067    pub async fn raw_query(
1068        &self,
1069        sql: &str,
1070    ) -> Result<Vec<serde_json::Map<String, serde_json::Value>>, libsql::Error> {
1071        let mut rows = self.conn.query(sql, ()).await?;
1072        let mut result = Vec::new();
1073        while let Some(row) = rows.next().await? {
1074            let col_count = row.column_count();
1075            let mut map = serde_json::Map::new();
1076            for i in 0..col_count {
1077                let col_name = row.column_name(i).unwrap_or("?").to_string();
1078                let value = match row.get_value(i)? {
1079                    libsql::Value::Null => serde_json::Value::Null,
1080                    libsql::Value::Integer(n) => serde_json::Value::Number(n.into()),
1081                    libsql::Value::Real(f) => serde_json::json!(f),
1082                    libsql::Value::Text(s) => serde_json::Value::String(s),
1083                    libsql::Value::Blob(b) => {
1084                        serde_json::Value::String(format!("<blob {} bytes>", b.len()))
1085                    }
1086                };
1087                map.insert(col_name, value);
1088            }
1089            result.push(map);
1090        }
1091        Ok(result)
1092    }
1093
1094    /// Refresh the index by walking the filesystem
1095    pub async fn refresh(&mut self) -> Result<usize, libsql::Error> {
1096        let walker = WalkBuilder::new(&self.root)
1097            .hidden(false)
1098            .git_ignore(true)
1099            .git_global(true)
1100            .git_exclude(true)
1101            .build();
1102
1103        self.conn.execute("BEGIN", ()).await?;
1104
1105        // Clear existing files
1106        self.conn.execute("DELETE FROM files", ()).await?;
1107
1108        let pb = if self.progress && std::io::IsTerminal::is_terminal(&std::io::stderr()) {
1109            let pb = ProgressBar::new_spinner();
1110            pb.set_style(
1111                ProgressStyle::with_template("{spinner:.cyan} {msg} [{elapsed_precise}]")
1112                    .unwrap_or_else(|_| ProgressStyle::default_spinner()),
1113            );
1114            pb.set_message("Scanning files...");
1115            pb
1116        } else {
1117            ProgressBar::hidden()
1118        };
1119
1120        let mut count = 0;
1121        for entry in walker.flatten() {
1122            let path = entry.path();
1123            if let Ok(rel) = path.strip_prefix(&self.root) {
1124                let rel_str = rel.to_string_lossy().to_string();
1125                // Skip internal directories
1126                if rel_str.is_empty() || rel_str == ".git" || rel_str.starts_with(".git/") {
1127                    continue;
1128                }
1129
1130                let is_dir = path.is_dir();
1131                let mtime = path
1132                    .metadata()
1133                    .ok()
1134                    .and_then(|m| m.modified().ok())
1135                    .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
1136                    .map(|d| d.as_secs() as i64)
1137                    .unwrap_or(0);
1138                // Count lines for text files (binary files will fail read_to_string and get 0)
1139                let lines = if is_dir {
1140                    0
1141                } else {
1142                    std::fs::read_to_string(path)
1143                        .map(|s| s.lines().count())
1144                        .unwrap_or(0)
1145                };
1146
1147                self.conn
1148                    .execute(
1149                        "INSERT INTO files (path, is_dir, mtime, lines) VALUES (?1, ?2, ?3, ?4)",
1150                        params![rel_str, is_dir as i64, mtime, lines as i64],
1151                    )
1152                    .await?;
1153                count += 1;
1154                pb.set_message(format!("Scanning files... {count}"));
1155                pb.tick();
1156            }
1157        }
1158
1159        pb.finish_and_clear();
1160
1161        // Update last indexed time
1162        let now = SystemTime::now()
1163            .duration_since(UNIX_EPOCH)
1164            .map(|d| d.as_secs() as i64)
1165            .unwrap_or(0);
1166        self.conn
1167            .execute(
1168                "INSERT OR REPLACE INTO meta (key, value) VALUES ('last_indexed', ?1)",
1169                params![now.to_string()],
1170            )
1171            .await?;
1172
1173        self.conn.execute("COMMIT", ()).await?;
1174
1175        Ok(count)
1176    }
1177
1178    /// Get all files from the index
1179    pub async fn all_files(&self) -> Result<Vec<IndexedFile>, libsql::Error> {
1180        let mut rows = self
1181            .conn
1182            .query("SELECT path, is_dir, mtime, lines FROM files", ())
1183            .await?;
1184        let mut files = Vec::new();
1185        while let Some(row) = rows.next().await? {
1186            files.push(IndexedFile {
1187                path: row.get(0)?,
1188                is_dir: row.get::<i64>(1)? != 0,
1189                mtime: row.get(2)?,
1190                lines: u64::try_from(row.get::<i64>(3)?).unwrap_or(0) as usize,
1191            });
1192        }
1193        Ok(files)
1194    }
1195
1196    /// Search files by exact name match
1197    pub async fn find_by_name(&self, name: &str) -> Result<Vec<IndexedFile>, libsql::Error> {
1198        let pattern = format!("%/{}", name);
1199        let mut rows = self
1200            .conn
1201            .query(
1202                "SELECT path, is_dir, mtime, lines FROM files WHERE path LIKE ?1 OR path = ?2",
1203                params![pattern, name],
1204            )
1205            .await?;
1206        let mut files = Vec::new();
1207        while let Some(row) = rows.next().await? {
1208            files.push(IndexedFile {
1209                path: row.get(0)?,
1210                is_dir: row.get::<i64>(1)? != 0,
1211                mtime: row.get(2)?,
1212                lines: u64::try_from(row.get::<i64>(3)?).unwrap_or(0) as usize,
1213            });
1214        }
1215        Ok(files)
1216    }
1217
1218    /// Search files by stem (filename without extension)
1219    pub async fn find_by_stem(&self, stem: &str) -> Result<Vec<IndexedFile>, libsql::Error> {
1220        let pattern = format!("%/{}%", stem);
1221        let mut rows = self
1222            .conn
1223            .query(
1224                "SELECT path, is_dir, mtime, lines FROM files WHERE path LIKE ?1",
1225                params![pattern],
1226            )
1227            .await?;
1228        let mut files = Vec::new();
1229        while let Some(row) = rows.next().await? {
1230            files.push(IndexedFile {
1231                path: row.get(0)?,
1232                is_dir: row.get::<i64>(1)? != 0,
1233                mtime: row.get(2)?,
1234                lines: u64::try_from(row.get::<i64>(3)?).unwrap_or(0) as usize,
1235            });
1236        }
1237        Ok(files)
1238    }
1239
1240    /// Count indexed files
1241    pub async fn count(&self) -> Result<usize, libsql::Error> {
1242        let mut rows = self.conn.query("SELECT COUNT(*) FROM files", ()).await?;
1243        if let Some(row) = rows.next().await? {
1244            Ok(u64::try_from(row.get::<i64>(0)?).unwrap_or(0) as usize)
1245        } else {
1246            Ok(0)
1247        }
1248    }
1249
1250    /// Index symbols and call graph for a file
1251    #[allow(dead_code)] // FileIndex API - used by daemon
1252    pub async fn index_file_symbols(
1253        &self,
1254        path: &str,
1255        symbols: &[FlatSymbol],
1256        calls: &[(String, String, usize)],
1257    ) -> Result<(), libsql::Error> {
1258        // Insert symbols
1259        for sym in symbols {
1260            self.conn.execute(
1261                "INSERT INTO symbols (file, name, kind, start_line, end_line, parent, visibility, is_impl) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
1262                params![path.to_string(), sym.name.clone(), sym.kind.as_str(), sym.start_line as i64, sym.end_line as i64, sym.parent.clone(), sym.visibility.as_str(), sym.is_interface_impl as i64],
1263            ).await?;
1264            for attr in &sym.attributes {
1265                self.conn
1266                    .execute(
1267                        "INSERT INTO symbol_attributes (file, name, attribute) VALUES (?1, ?2, ?3)",
1268                        params![path.to_string(), sym.name.clone(), attr.clone()],
1269                    )
1270                    .await?;
1271            }
1272            if let Some(doc) = &sym.docstring {
1273                self.conn
1274                    .execute(
1275                        "INSERT INTO symbol_attributes (file, name, attribute) VALUES (?1, ?2, ?3)",
1276                        params![path.to_string(), sym.name.clone(), format!("doc:{doc}")],
1277                    )
1278                    .await?;
1279            }
1280            for iface in &sym.implements {
1281                self.conn
1282                    .execute(
1283                        "INSERT INTO symbol_implements (file, name, interface) VALUES (?1, ?2, ?3)",
1284                        params![path.to_string(), sym.name.clone(), iface.clone()],
1285                    )
1286                    .await?;
1287            }
1288        }
1289
1290        // Insert calls (caller_symbol, callee_name, line)
1291        for (caller_symbol, callee_name, line) in calls {
1292            self.conn.execute(
1293                "INSERT INTO calls (caller_file, caller_symbol, callee_name, line) VALUES (?1, ?2, ?3, ?4)",
1294                params![path.to_string(), caller_symbol.clone(), callee_name.clone(), *line as i64],
1295            ).await?;
1296        }
1297
1298        Ok(())
1299    }
1300
1301    /// Find callers of a specific symbol definition (from call graph).
1302    ///
1303    /// `def_file` is the file that contains the definition being searched. Results are
1304    /// restricted to files that are `def_file` itself (self-recursive calls) or that
1305    /// explicitly import the symbol. This prevents false positives from unrelated
1306    /// functions with the same name in other modules.
1307    ///
1308    /// Resolves through imports: if file A imports X as Y and calls Y(), it is found
1309    /// as a caller of X. Also handles qualified calls (`foo.bar()`) and `self.method()`
1310    /// resolved to the containing class.
1311    pub async fn find_callers(
1312        &self,
1313        symbol_name: &str,
1314        def_file: &str,
1315    ) -> Result<Vec<(String, String, usize, Option<String>)>, libsql::Error> {
1316        // Handle Class.method format - split and search for method within class
1317        let (class_filter, method_name) = if symbol_name.contains('.') {
1318            let parts: Vec<&str> = symbol_name.splitn(2, '.').collect();
1319            (Some(parts[0]), parts[1])
1320        } else {
1321            (None, symbol_name)
1322        };
1323
1324        // If searching for Class.method, find callers that call self.method within that class
1325        if let Some(class_name) = class_filter {
1326            let mut rows = self
1327                .conn
1328                .query(
1329                    "SELECT c.caller_file, c.caller_symbol, c.line, c.access
1330                 FROM calls c
1331                 JOIN symbols s ON c.caller_file = s.file AND c.caller_symbol = s.name
1332                 WHERE c.callee_name = ?1 AND c.callee_qualifier = 'self' AND s.parent = ?2",
1333                    params![method_name, class_name],
1334                )
1335                .await?;
1336            let mut callers = Vec::new();
1337            while let Some(row) = rows.next().await? {
1338                callers.push((
1339                    row.get(0)?,
1340                    row.get(1)?,
1341                    u64::try_from(row.get::<i64>(2)?).unwrap_or(0) as usize,
1342                    row.get::<Option<String>>(3)?,
1343                ));
1344            }
1345
1346            if !callers.is_empty() {
1347                return Ok(callers);
1348            }
1349        }
1350
1351        // Use callee_resolved_file when available for precise call resolution.
1352        // Falls back to import-based matching when callee_resolved_file is NULL
1353        // (external packages, unresolved modules).
1354        //
1355        // Branch 1: callee_resolved_file = def_file (precise match)
1356        // Branch 2: Same-file calls (caller_file = def_file, no qualifier)
1357        // Branch 3: Import-based fallback for unresolved calls (callee_resolved_file IS NULL)
1358        // Branch 4: self.method() calls within a class
1359        let mut rows = self.conn.query(
1360            "SELECT caller_file, caller_symbol, line, access FROM calls
1361             WHERE callee_name = ?1 AND callee_resolved_file = ?2
1362             UNION
1363             SELECT caller_file, caller_symbol, line, access FROM calls
1364             WHERE callee_name = ?1 AND caller_file = ?2
1365               AND callee_resolved_file IS NULL AND callee_qualifier IS NULL
1366             UNION
1367             SELECT c.caller_file, c.caller_symbol, c.line, c.access
1368             FROM calls c
1369             JOIN imports i ON c.caller_file = i.file AND c.callee_name = COALESCE(i.alias, i.name)
1370             WHERE i.name = ?1 AND c.callee_resolved_file IS NULL
1371               AND (i.resolved_file = ?2 OR i.resolved_file IS NULL)
1372             UNION
1373             SELECT c.caller_file, c.caller_symbol, c.line, c.access
1374             FROM calls c
1375             JOIN imports i ON c.caller_file = i.file AND c.callee_qualifier = COALESCE(i.alias, i.name)
1376             WHERE c.callee_name = ?1 AND i.module IS NULL AND c.callee_resolved_file IS NULL
1377               AND (i.resolved_file = ?2 OR i.resolved_file IS NULL)
1378             UNION
1379             SELECT c.caller_file, c.caller_symbol, c.line, c.access
1380             FROM calls c
1381             JOIN symbols s ON c.caller_file = s.file AND c.caller_symbol = s.name
1382             WHERE c.callee_name = ?1 AND c.callee_qualifier = 'self'
1383               AND s.parent IS NOT NULL AND c.callee_resolved_file IS NULL",
1384            params![method_name, def_file],
1385        ).await?;
1386        let mut callers = Vec::new();
1387        while let Some(row) = rows.next().await? {
1388            callers.push((
1389                row.get(0)?,
1390                row.get(1)?,
1391                u64::try_from(row.get::<i64>(2)?).unwrap_or(0) as usize,
1392                row.get::<Option<String>>(3)?,
1393            ));
1394        }
1395
1396        Ok(callers)
1397    }
1398
1399    /// Find callees of a symbol (what it calls)
1400    pub async fn find_callees(
1401        &self,
1402        file: &str,
1403        symbol_name: &str,
1404    ) -> Result<Vec<(String, usize, Option<String>)>, libsql::Error> {
1405        let mut rows = self
1406            .conn
1407            .query(
1408                "SELECT callee_name, line, access FROM calls WHERE caller_file = ?1 AND caller_symbol = ?2",
1409                params![file, symbol_name],
1410            )
1411            .await?;
1412        let mut callees = Vec::new();
1413        while let Some(row) = rows.next().await? {
1414            callees.push((
1415                row.get(0)?,
1416                u64::try_from(row.get::<i64>(1)?).unwrap_or(0) as usize,
1417                row.get::<Option<String>>(2)?,
1418            ));
1419        }
1420        Ok(callees)
1421    }
1422
1423    /// Find callees with their resolved definition file.
1424    ///
1425    /// Returns `(callee_name, line, Option<def_file>)` where `def_file` is the
1426    /// root-relative path of the file that defines the callee, resolved via the
1427    /// imports table's `resolved_file` column. `None` means the callee is locally
1428    /// defined, external, or could not be resolved.
1429    pub async fn find_callees_resolved(
1430        &self,
1431        file: &str,
1432        symbol_name: &str,
1433    ) -> Result<Vec<(String, usize, Option<String>)>, libsql::Error> {
1434        let mut rows = self
1435            .conn
1436            .query(
1437                "SELECT c.callee_name, c.line, i.resolved_file
1438                 FROM calls c
1439                 LEFT JOIN imports i
1440                   ON c.caller_file = i.file
1441                   AND c.callee_name = COALESCE(i.alias, i.name)
1442                 WHERE c.caller_file = ?1 AND c.caller_symbol = ?2",
1443                params![file, symbol_name],
1444            )
1445            .await?;
1446        let mut callees = Vec::new();
1447        while let Some(row) = rows.next().await? {
1448            callees.push((
1449                row.get(0)?,
1450                u64::try_from(row.get::<i64>(1)?).unwrap_or(0) as usize,
1451                row.get::<Option<String>>(2)?,
1452            ));
1453        }
1454        Ok(callees)
1455    }
1456
1457    /// Find a symbol by name
1458    pub async fn find_symbol(
1459        &self,
1460        name: &str,
1461    ) -> Result<Vec<(String, String, usize, usize)>, libsql::Error> {
1462        let mut rows = self
1463            .conn
1464            .query(
1465                "SELECT file, kind, start_line, end_line FROM symbols WHERE name = ?1",
1466                params![name],
1467            )
1468            .await?;
1469        let mut symbols = Vec::new();
1470        while let Some(row) = rows.next().await? {
1471            symbols.push((
1472                row.get(0)?,
1473                row.get(1)?,
1474                u64::try_from(row.get::<i64>(2)?).unwrap_or(0) as usize,
1475                u64::try_from(row.get::<i64>(3)?).unwrap_or(0) as usize,
1476            ));
1477        }
1478        Ok(symbols)
1479    }
1480
1481    /// Get all distinct symbol names as a HashSet.
1482    pub async fn all_symbol_names(
1483        &self,
1484    ) -> Result<std::collections::HashSet<String>, libsql::Error> {
1485        let mut rows = self
1486            .conn
1487            .query("SELECT DISTINCT name FROM symbols", ())
1488            .await?;
1489        let mut names = std::collections::HashSet::new();
1490        while let Some(row) = rows.next().await? {
1491            names.insert(row.get(0)?);
1492        }
1493        Ok(names)
1494    }
1495
1496    /// Find symbols by name with fuzzy matching, optional kind filter, and limit
1497    pub async fn find_symbols(
1498        &self,
1499        query: &str,
1500        kind: Option<&str>,
1501        fuzzy: bool,
1502        limit: usize,
1503    ) -> Result<Vec<SymbolMatch>, libsql::Error> {
1504        let query_lower = query.to_lowercase();
1505        let prefix_pattern = format!("{}%", query_lower);
1506        let limit_i64 = i64::try_from(limit).unwrap_or(i64::MAX);
1507
1508        let mut symbols = Vec::new();
1509
1510        if fuzzy {
1511            let pattern = format!("%{}%", query_lower);
1512            let mut rows = if let Some(k) = kind {
1513                self.conn
1514                    .query(
1515                        "SELECT name, kind, file, start_line, end_line, parent FROM symbols
1516                     WHERE LOWER(name) LIKE ?1 AND kind = ?2
1517                     ORDER BY
1518                       CASE WHEN LOWER(name) = ?3 THEN 0
1519                            WHEN LOWER(name) LIKE ?4 THEN 1
1520                            ELSE 2 END,
1521                       LENGTH(name), name
1522                     LIMIT ?5",
1523                        params![pattern, k, query_lower, prefix_pattern, limit_i64],
1524                    )
1525                    .await?
1526            } else {
1527                self.conn
1528                    .query(
1529                        "SELECT name, kind, file, start_line, end_line, parent FROM symbols
1530                     WHERE LOWER(name) LIKE ?1
1531                     ORDER BY
1532                       CASE WHEN LOWER(name) = ?2 THEN 0
1533                            WHEN LOWER(name) LIKE ?3 THEN 1
1534                            ELSE 2 END,
1535                       LENGTH(name), name
1536                     LIMIT ?4",
1537                        params![pattern, query_lower, prefix_pattern, limit_i64],
1538                    )
1539                    .await?
1540            };
1541
1542            while let Some(row) = rows.next().await? {
1543                symbols.push(SymbolMatch {
1544                    name: row.get(0)?,
1545                    kind: row.get(1)?,
1546                    file: row.get(2)?,
1547                    start_line: u64::try_from(row.get::<i64>(3)?).unwrap_or(0) as usize,
1548                    end_line: u64::try_from(row.get::<i64>(4)?).unwrap_or(0) as usize,
1549                    parent: row.get(5)?,
1550                });
1551            }
1552        } else {
1553            // Exact match
1554            let mut rows = if let Some(k) = kind {
1555                self.conn
1556                    .query(
1557                        "SELECT name, kind, file, start_line, end_line, parent FROM symbols
1558                     WHERE LOWER(name) = LOWER(?1) AND kind = ?2
1559                     LIMIT ?3",
1560                        params![query, k, limit_i64],
1561                    )
1562                    .await?
1563            } else {
1564                self.conn
1565                    .query(
1566                        "SELECT name, kind, file, start_line, end_line, parent FROM symbols
1567                     WHERE LOWER(name) = LOWER(?1)
1568                     LIMIT ?2",
1569                        params![query, limit_i64],
1570                    )
1571                    .await?
1572            };
1573
1574            while let Some(row) = rows.next().await? {
1575                symbols.push(SymbolMatch {
1576                    name: row.get(0)?,
1577                    kind: row.get(1)?,
1578                    file: row.get(2)?,
1579                    start_line: u64::try_from(row.get::<i64>(3)?).unwrap_or(0) as usize,
1580                    end_line: u64::try_from(row.get::<i64>(4)?).unwrap_or(0) as usize,
1581                    parent: row.get(5)?,
1582                });
1583            }
1584        }
1585
1586        Ok(symbols)
1587    }
1588
1589    /// Get call graph stats
1590    pub async fn call_graph_stats(&self) -> Result<CallGraphStats, libsql::Error> {
1591        let symbols = {
1592            let mut rows = self.conn.query("SELECT COUNT(*) FROM symbols", ()).await?;
1593            if let Some(row) = rows.next().await? {
1594                u64::try_from(row.get::<i64>(0)?).unwrap_or(0) as usize
1595            } else {
1596                0
1597            }
1598        };
1599        let calls = {
1600            let mut rows = self.conn.query("SELECT COUNT(*) FROM calls", ()).await?;
1601            if let Some(row) = rows.next().await? {
1602                u64::try_from(row.get::<i64>(0)?).unwrap_or(0) as usize
1603            } else {
1604                0
1605            }
1606        };
1607        let imports = {
1608            let mut rows = self.conn.query("SELECT COUNT(*) FROM imports", ()).await?;
1609            if let Some(row) = rows.next().await? {
1610                u64::try_from(row.get::<i64>(0)?).unwrap_or(0) as usize
1611            } else {
1612                0
1613            }
1614        };
1615        Ok(CallGraphStats {
1616            symbols,
1617            calls,
1618            imports,
1619        })
1620    }
1621
1622    /// Load all call edges from the calls table.
1623    /// Returns Vec<(caller_file, caller_symbol, callee_name)>.
1624    /// Used by test-gaps analysis for bulk caller lookup.
1625    pub async fn all_call_edges(&self) -> Result<Vec<(String, String, String)>, libsql::Error> {
1626        let mut rows = self
1627            .conn
1628            .query(
1629                "SELECT caller_file, caller_symbol, callee_name FROM calls",
1630                (),
1631            )
1632            .await?;
1633        let mut edges = Vec::new();
1634        while let Some(row) = rows.next().await? {
1635            edges.push((row.get(0)?, row.get(1)?, row.get(2)?));
1636        }
1637        Ok(edges)
1638    }
1639
1640    /// Load all imports from the imports table.
1641    /// Returns Vec<(file, module, name, line)>.
1642    /// Used by rules for building relations.
1643    pub async fn all_imports(&self) -> Result<Vec<(String, String, String, u32)>, libsql::Error> {
1644        let mut rows = self
1645            .conn
1646            .query("SELECT file, module, name, line FROM imports", ())
1647            .await?;
1648        let mut imports = Vec::new();
1649        while let Some(row) = rows.next().await? {
1650            // module can be NULL in some cases
1651            let module: Option<String> = row.get(1).ok();
1652            imports.push((
1653                row.get(0)?,
1654                module.unwrap_or_default(),
1655                row.get(2)?,
1656                u32::try_from(row.get::<i64>(3)?).unwrap_or(0),
1657            ));
1658        }
1659        Ok(imports)
1660    }
1661
1662    /// Load all resolved import edges from the imports table.
1663    /// Returns Vec<(importer_file, imported_file)> for rows where `resolved_file IS NOT NULL`.
1664    /// The paths are root-relative strings as stored in the database.
1665    /// Used by the daemon to build the reverse-dep graph on startup.
1666    pub async fn all_resolved_import_edges(&self) -> Result<Vec<(String, String)>, libsql::Error> {
1667        let mut rows = self
1668            .conn
1669            .query(
1670                "SELECT file, resolved_file FROM imports WHERE resolved_file IS NOT NULL",
1671                (),
1672            )
1673            .await?;
1674        let mut edges = Vec::new();
1675        while let Some(row) = rows.next().await? {
1676            edges.push((row.get(0)?, row.get(1)?));
1677        }
1678        Ok(edges)
1679    }
1680
1681    /// Load all resolved import edges with line numbers.
1682    /// Returns `Vec<(importer_file, line, resolved_file)>` for rows where
1683    /// `resolved_file IS NOT NULL`. Used by the boundary-violations native rule
1684    /// to check cross-boundary imports with precise source locations.
1685    pub async fn all_resolved_imports_with_lines(
1686        &self,
1687    ) -> Result<Vec<(String, u32, String)>, libsql::Error> {
1688        let mut rows = self
1689            .conn
1690            .query(
1691                "SELECT file, line, resolved_file FROM imports WHERE resolved_file IS NOT NULL",
1692                (),
1693            )
1694            .await?;
1695        let mut edges = Vec::new();
1696        while let Some(row) = rows.next().await? {
1697            let line = u32::try_from(row.get::<i64>(1)?).unwrap_or(0);
1698            edges.push((row.get(0)?, line, row.get(2)?));
1699        }
1700        Ok(edges)
1701    }
1702
1703    /// Count distinct resolved import targets per file (fan-out).
1704    /// Returns `Vec<(file, count)>` ordered by count descending.
1705    /// Only counts rows where `resolved_file IS NOT NULL`.
1706    /// Used by the `high-fan-out` native rule.
1707    pub async fn import_fan_out_by_file(&self) -> Result<Vec<(String, usize)>, libsql::Error> {
1708        let mut rows = self
1709            .conn
1710            .query(
1711                "SELECT file, COUNT(DISTINCT resolved_file) AS cnt \
1712                 FROM imports WHERE resolved_file IS NOT NULL \
1713                 GROUP BY file ORDER BY cnt DESC",
1714                (),
1715            )
1716            .await?;
1717        let mut result = Vec::new();
1718        while let Some(row) = rows.next().await? {
1719            let count = usize::try_from(row.get::<i64>(1)?).unwrap_or(0);
1720            result.push((row.get(0)?, count));
1721        }
1722        Ok(result)
1723    }
1724
1725    /// Count distinct files that import each file (fan-in).
1726    /// Returns `Vec<(file, count)>` ordered by count descending.
1727    /// Only counts rows where `resolved_file IS NOT NULL`.
1728    /// Used by the `high-fan-in` native rule.
1729    pub async fn import_fan_in_by_file(&self) -> Result<Vec<(String, usize)>, libsql::Error> {
1730        let mut rows = self
1731            .conn
1732            .query(
1733                "SELECT resolved_file, COUNT(DISTINCT file) AS cnt \
1734                 FROM imports WHERE resolved_file IS NOT NULL \
1735                 GROUP BY resolved_file ORDER BY cnt DESC",
1736                (),
1737            )
1738            .await?;
1739        let mut result = Vec::new();
1740        while let Some(row) = rows.next().await? {
1741            let count = usize::try_from(row.get::<i64>(1)?).unwrap_or(0);
1742            result.push((row.get(0)?, count));
1743        }
1744        Ok(result)
1745    }
1746
1747    /// Load resolved import edges for a specific importer file (root-relative path).
1748    /// Returns Vec<imported_file> where `resolved_file IS NOT NULL`.
1749    /// Used by the daemon to update outgoing edges for a changed file.
1750    pub async fn resolved_imports_for_file(
1751        &self,
1752        file: &str,
1753    ) -> Result<Vec<String>, libsql::Error> {
1754        let mut rows = self
1755            .conn
1756            .query(
1757                "SELECT resolved_file FROM imports WHERE file = ?1 AND resolved_file IS NOT NULL",
1758                params![file.to_string()],
1759            )
1760            .await?;
1761        let mut targets = Vec::new();
1762        while let Some(row) = rows.next().await? {
1763            targets.push(row.get(0)?);
1764        }
1765        Ok(targets)
1766    }
1767
1768    /// Find the shortest import path(s) from `from` to `to` via BFS over the resolved import graph.
1769    ///
1770    /// `from` and `to` are root-relative path strings (as stored in the DB).
1771    /// Returns all shortest paths (there may be more than one of equal length).
1772    /// If `all_paths` is true, returns all simple paths up to `path_limit` paths
1773    /// and up to `max_depth` hops deep.
1774    /// Returns an empty vec if no path exists.
1775    pub async fn find_import_path(
1776        &self,
1777        from: &str,
1778        to: &str,
1779        all_paths: bool,
1780        path_limit: usize,
1781        max_depth: usize,
1782    ) -> Result<Vec<Vec<String>>, libsql::Error> {
1783        use std::collections::{HashMap, HashSet, VecDeque};
1784
1785        if from == to {
1786            return Ok(vec![vec![from.to_string()]]);
1787        }
1788
1789        // Build adjacency list: file -> set of files it imports
1790        let mut adj: HashMap<String, Vec<String>> = HashMap::new();
1791        let mut rows = self
1792            .conn
1793            .query(
1794                "SELECT file, resolved_file FROM imports WHERE resolved_file IS NOT NULL",
1795                (),
1796            )
1797            .await?;
1798        while let Some(row) = rows.next().await? {
1799            let file: String = row.get(0)?;
1800            let resolved: String = row.get(1)?;
1801            adj.entry(file).or_default().push(resolved);
1802        }
1803
1804        if !all_paths {
1805            // BFS for shortest path
1806            let mut visited: HashMap<String, String> = HashMap::new(); // node -> parent
1807            let mut queue: VecDeque<String> = VecDeque::new();
1808            queue.push_back(from.to_string());
1809            visited.insert(from.to_string(), String::new());
1810
1811            let mut found = false;
1812            'bfs: while let Some(node) = queue.pop_front() {
1813                // Check depth
1814                let depth = {
1815                    let mut d = 0usize;
1816                    let mut cur = &node;
1817                    while let Some(p) = visited.get(cur) {
1818                        if p.is_empty() {
1819                            break;
1820                        }
1821                        d += 1;
1822                        cur = p;
1823                        if d > max_depth {
1824                            break;
1825                        }
1826                    }
1827                    d
1828                };
1829                if depth >= max_depth {
1830                    continue;
1831                }
1832                if let Some(neighbors) = adj.get(&node) {
1833                    for neighbor in neighbors {
1834                        if !visited.contains_key(neighbor.as_str()) {
1835                            visited.insert(neighbor.clone(), node.clone());
1836                            if neighbor == to {
1837                                found = true;
1838                                break 'bfs;
1839                            }
1840                            queue.push_back(neighbor.clone());
1841                        }
1842                    }
1843                }
1844            }
1845
1846            if !found {
1847                return Ok(vec![]);
1848            }
1849
1850            // Reconstruct path by backtracking through visited
1851            let mut path = vec![to.to_string()];
1852            let mut cur = to.to_string();
1853            loop {
1854                let parent = visited.get(&cur).cloned().unwrap_or_default();
1855                if parent.is_empty() {
1856                    break;
1857                }
1858                path.push(parent.clone());
1859                cur = parent;
1860            }
1861            path.reverse();
1862            Ok(vec![path])
1863        } else {
1864            // DFS to find all simple paths up to path_limit
1865            let mut result: Vec<Vec<String>> = Vec::new();
1866            let mut stack: VecDeque<(String, Vec<String>, HashSet<String>)> = VecDeque::new();
1867            let mut initial_visited = HashSet::new();
1868            initial_visited.insert(from.to_string());
1869            stack.push_back((from.to_string(), vec![from.to_string()], initial_visited));
1870
1871            while let Some((node, path, visited)) = stack.pop_back() {
1872                if result.len() >= path_limit {
1873                    break;
1874                }
1875                if path.len() > max_depth + 1 {
1876                    continue;
1877                }
1878                if let Some(neighbors) = adj.get(&node) {
1879                    for neighbor in neighbors {
1880                        if visited.contains(neighbor.as_str()) {
1881                            continue;
1882                        }
1883                        let mut new_path = path.clone();
1884                        new_path.push(neighbor.clone());
1885                        if neighbor == to {
1886                            result.push(new_path);
1887                            if result.len() >= path_limit {
1888                                break;
1889                            }
1890                        } else {
1891                            let mut new_visited = visited.clone();
1892                            new_visited.insert(neighbor.clone());
1893                            stack.push_back((neighbor.clone(), new_path, new_visited));
1894                        }
1895                    }
1896                }
1897            }
1898
1899            Ok(result)
1900        }
1901    }
1902
1903    /// Load all symbol implements from the symbol_implements table.
1904    /// Returns Vec<(file, name, interface)>.
1905    pub async fn all_symbol_implements(
1906        &self,
1907    ) -> Result<Vec<(String, String, String)>, libsql::Error> {
1908        let mut rows = self
1909            .conn
1910            .query("SELECT file, name, interface FROM symbol_implements", ())
1911            .await?;
1912        let mut implements = Vec::new();
1913        while let Some(row) = rows.next().await? {
1914            implements.push((row.get(0)?, row.get(1)?, row.get(2)?));
1915        }
1916        Ok(implements)
1917    }
1918
1919    /// Load all type methods from the type_methods table.
1920    /// Returns Vec<(file, type_name, method_name)>.
1921    pub async fn all_type_methods(&self) -> Result<Vec<(String, String, String)>, libsql::Error> {
1922        let mut rows = self
1923            .conn
1924            .query("SELECT file, type_name, method_name FROM type_methods", ())
1925            .await?;
1926        let mut methods = Vec::new();
1927        while let Some(row) = rows.next().await? {
1928            methods.push((row.get(0)?, row.get(1)?, row.get(2)?));
1929        }
1930        Ok(methods)
1931    }
1932
1933    /// Load all calls with line numbers.
1934    /// Returns Vec<(caller_file, caller_symbol, callee_name, line)>.
1935    /// Used by rules for building relations.
1936    pub async fn all_calls_with_lines(
1937        &self,
1938    ) -> Result<Vec<(String, String, String, u32)>, libsql::Error> {
1939        let mut rows = self
1940            .conn
1941            .query(
1942                "SELECT caller_file, caller_symbol, callee_name, line FROM calls",
1943                (),
1944            )
1945            .await?;
1946        let mut calls = Vec::new();
1947        while let Some(row) = rows.next().await? {
1948            calls.push((
1949                row.get(0)?,
1950                row.get(1)?,
1951                row.get(2)?,
1952                u32::try_from(row.get::<i64>(3)?).unwrap_or(0),
1953            ));
1954        }
1955        Ok(calls)
1956    }
1957
1958    /// Load all symbols from the symbols table with full details.
1959    /// Returns Vec<(file, name, kind, start_line, end_line, parent, visibility, is_impl)>.
1960    /// Used by test-gaps analysis to classify test context.
1961    pub async fn all_symbols_with_details(
1962        &self,
1963    ) -> Result<
1964        Vec<(
1965            String,
1966            String,
1967            String,
1968            usize,
1969            usize,
1970            Option<String>,
1971            String,
1972            bool,
1973        )>,
1974        libsql::Error,
1975    > {
1976        let mut rows = self
1977            .conn
1978            .query(
1979                "SELECT file, name, kind, start_line, end_line, parent, visibility, is_impl FROM symbols",
1980                (),
1981            )
1982            .await?;
1983        let mut symbols = Vec::new();
1984        while let Some(row) = rows.next().await? {
1985            symbols.push((
1986                row.get(0)?,
1987                row.get(1)?,
1988                row.get(2)?,
1989                u64::try_from(row.get::<i64>(3)?).unwrap_or(0) as usize,
1990                u64::try_from(row.get::<i64>(4)?).unwrap_or(0) as usize,
1991                row.get(5).ok(),
1992                row.get::<String>(6)
1993                    .unwrap_or_else(|_| "public".to_string()),
1994                row.get::<i64>(7).unwrap_or(0) != 0,
1995            ));
1996        }
1997        Ok(symbols)
1998    }
1999
2000    /// Load all symbol attributes from the symbol_attributes table.
2001    /// Returns Vec<(file, name, attribute)>.
2002    pub async fn all_symbol_attributes(
2003        &self,
2004    ) -> Result<Vec<(String, String, String)>, libsql::Error> {
2005        let mut rows = self
2006            .conn
2007            .query("SELECT file, name, attribute FROM symbol_attributes", ())
2008            .await?;
2009        let mut attrs = Vec::new();
2010        while let Some(row) = rows.next().await? {
2011            attrs.push((row.get(0)?, row.get(1)?, row.get(2)?));
2012        }
2013        Ok(attrs)
2014    }
2015
2016    /// Load all calls with qualifiers.
2017    /// Returns Vec<(caller_file, caller_symbol, callee_name, callee_qualifier, line)>.
2018    pub async fn all_calls_with_qualifiers(
2019        &self,
2020    ) -> Result<Vec<(String, String, String, Option<String>, u32)>, libsql::Error> {
2021        let mut rows = self
2022            .conn
2023            .query(
2024                "SELECT caller_file, caller_symbol, callee_name, callee_qualifier, line FROM calls",
2025                (),
2026            )
2027            .await?;
2028        let mut calls = Vec::new();
2029        while let Some(row) = rows.next().await? {
2030            calls.push((
2031                row.get(0)?,
2032                row.get(1)?,
2033                row.get(2)?,
2034                row.get(3).ok(),
2035                u32::try_from(row.get::<i64>(4)?).unwrap_or(0),
2036            ));
2037        }
2038        Ok(calls)
2039    }
2040
2041    /// Return all CFG effect rows: (file, function_qname, function_start_line, block_id, kind, line, label).
2042    /// Query all CFG edge facts from the index.
2043    /// Returns tuples of (file, function_qname, function_start_line, from_block, to_block, kind, exception_type).
2044    pub async fn all_cfg_edges(
2045        &self,
2046    ) -> Result<Vec<(String, String, u32, u32, u32, String, String)>, libsql::Error> {
2047        let mut rows = self
2048            .conn
2049            .query(
2050                "SELECT file, function_qname, function_start_line, from_block, to_block, kind, COALESCE(exception_type, '') FROM cfg_edges",
2051                (),
2052            )
2053            .await?;
2054        let mut edges = Vec::new();
2055        while let Some(row) = rows.next().await? {
2056            edges.push((
2057                row.get::<String>(0)?,
2058                row.get::<String>(1)?,
2059                u32::try_from(row.get::<i64>(2)?).unwrap_or(0),
2060                u32::try_from(row.get::<i64>(3)?).unwrap_or(0),
2061                u32::try_from(row.get::<i64>(4)?).unwrap_or(0),
2062                row.get::<String>(5)?,
2063                row.get::<String>(6)?,
2064            ));
2065        }
2066        Ok(edges)
2067    }
2068
2069    pub async fn all_cfg_effects(
2070        &self,
2071    ) -> Result<Vec<(String, String, u32, u32, String, u32, String)>, libsql::Error> {
2072        let mut rows = self
2073            .conn
2074            .query(
2075                "SELECT file, function_qname, function_start_line, block_id, kind, line, COALESCE(label, '') FROM cfg_effects",
2076                (),
2077            )
2078            .await?;
2079        let mut effects = Vec::new();
2080        while let Some(row) = rows.next().await? {
2081            effects.push((
2082                row.get::<String>(0)?,
2083                row.get::<String>(1)?,
2084                u32::try_from(row.get::<i64>(2)?).unwrap_or(0),
2085                u32::try_from(row.get::<i64>(3)?).unwrap_or(0),
2086                row.get::<String>(4)?,
2087                u32::try_from(row.get::<i64>(5)?).unwrap_or(0),
2088                row.get::<String>(6)?,
2089            ));
2090        }
2091        Ok(effects)
2092    }
2093
2094    /// Convert a module name to possible file paths using the language's trait method.
2095    /// Returns only paths that exist in the index.
2096    pub async fn module_to_files(&self, module: &str, source_file: &str) -> Vec<String> {
2097        // Get language from the source file extension
2098        let lang = match support_for_path(Path::new(source_file)) {
2099            Some(l) => l,
2100            None => return vec![],
2101        };
2102
2103        // Get local deps implementation for this language
2104        let deps = match normalize_local_deps::registry::deps_for_language(lang.name()) {
2105            Some(d) => d,
2106            None => return vec![],
2107        };
2108
2109        // First try resolve_local_import which handles crate::, super::, self:: properly
2110        let source_path = self.root.join(source_file);
2111        if let Some(resolved) = deps.resolve_local_import(module, &source_path, &self.root) {
2112            // Convert absolute path back to relative path for index lookup
2113            if let Ok(rel_path) = resolved.strip_prefix(&self.root) {
2114                let rel_str = rel_path.to_string_lossy().to_string();
2115                // Verify it exists in index
2116                if let Ok(mut rows) = self
2117                    .conn
2118                    .query(
2119                        "SELECT 1 FROM files WHERE path = ?1",
2120                        params![rel_str.clone()],
2121                    )
2122                    .await
2123                    && rows.next().await.ok().flatten().is_some()
2124                {
2125                    return vec![rel_str];
2126                }
2127            }
2128        }
2129
2130        // Fall back to module_name_to_paths for simpler lookups
2131        let candidates = deps.module_name_to_paths(module);
2132
2133        // Filter to files that exist in index
2134        let mut result = Vec::new();
2135        for path in candidates {
2136            let mut rows = match self
2137                .conn
2138                .query("SELECT 1 FROM files WHERE path = ?1", params![path.clone()])
2139                .await
2140            {
2141                Ok(r) => r,
2142                Err(_) => continue,
2143            };
2144            if rows.next().await.ok().flatten().is_some() {
2145                result.push(path);
2146            }
2147        }
2148        result
2149    }
2150
2151    /// Resolve all unresolved import rows by populating `resolved_file`.
2152    ///
2153    /// For each import row where `module IS NOT NULL` and `resolved_file IS NULL`,
2154    /// calls `module_to_files()` to convert the module specifier to a project-relative
2155    /// file path and writes it back. Rows that cannot be resolved (external packages,
2156    /// stdlib, unknown modules) keep `resolved_file = NULL`.
2157    ///
2158    /// Safe to call multiple times — only processes rows with `resolved_file IS NULL`.
2159    pub async fn resolve_all_imports(&self) -> Result<usize, libsql::Error> {
2160        // Collect distinct (file, module) pairs that still need resolution.
2161        // We can't mutate while iterating, so collect first.
2162        let mut rows = self
2163            .conn
2164            .query(
2165                "SELECT DISTINCT file, module FROM imports WHERE module IS NOT NULL AND resolved_file IS NULL",
2166                (),
2167            )
2168            .await?;
2169        let mut pending: Vec<(String, String)> = Vec::new();
2170        while let Some(row) = rows.next().await? {
2171            pending.push((row.get(0)?, row.get(1)?));
2172        }
2173
2174        let mut resolved_count = 0;
2175        for (file, module) in pending {
2176            let files = self.module_to_files(&module, &file).await;
2177            if let Some(resolved_file) = files.first() {
2178                self.conn
2179                    .execute(
2180                        "UPDATE imports SET resolved_file = ?1 WHERE file = ?2 AND module = ?3 AND resolved_file IS NULL",
2181                        params![resolved_file.clone(), file.clone(), module.clone()],
2182                    )
2183                    .await?;
2184                resolved_count += 1;
2185            }
2186        }
2187        Ok(resolved_count)
2188    }
2189
2190    /// Resolve import specifiers using per-language `ModuleResolver` implementations.
2191    ///
2192    /// Runs after `resolve_all_imports` as a second pass: for any import row that still
2193    /// has `resolved_file IS NULL`, look up the language's `ModuleResolver` and call
2194    /// `resolve()` directly. Updates `resolved_file` on successful resolutions.
2195    ///
2196    /// Uses the workspace root to build `ResolverConfig` once per language, then
2197    /// resolves all pending imports for that language's files.
2198    pub async fn resolve_imports_via_module_resolver(&self) -> Result<usize, libsql::Error> {
2199        use normalize_languages::{ImportSpec, Resolution, support_for_path};
2200        use std::collections::HashMap;
2201
2202        // Collect pending imports: (file, module, name)
2203        let mut rows = self
2204            .conn
2205            .query(
2206                "SELECT file, module, name FROM imports WHERE module IS NOT NULL AND resolved_file IS NULL",
2207                (),
2208            )
2209            .await?;
2210        let mut pending: Vec<(String, String, String)> = Vec::new();
2211        while let Some(row) = rows.next().await? {
2212            let module: Option<String> = row.get(1)?;
2213            if let Some(module) = module {
2214                pending.push((row.get(0)?, module, row.get(2)?));
2215            }
2216        }
2217
2218        if pending.is_empty() {
2219            return Ok(0);
2220        }
2221
2222        // Build resolver configs keyed by language name (cache per workspace)
2223        let mut resolver_configs: HashMap<&'static str, normalize_languages::ResolverConfig> =
2224            HashMap::new();
2225
2226        let mut resolved_count = 0usize;
2227        for (file_str, module_str, name_str) in &pending {
2228            let file_path = self.root.join(file_str);
2229            let lang = match support_for_path(&file_path) {
2230                Some(l) => l,
2231                None => continue,
2232            };
2233            let resolver = match lang.module_resolver() {
2234                Some(r) => r,
2235                None => continue,
2236            };
2237
2238            let cfg = resolver_configs
2239                .entry(lang.name())
2240                .or_insert_with(|| resolver.workspace_config(&self.root));
2241
2242            let spec = ImportSpec {
2243                raw: module_str.clone(),
2244                is_relative: module_str.starts_with('.'),
2245                names: if name_str == "*" {
2246                    Vec::new()
2247                } else {
2248                    vec![name_str.clone()]
2249                },
2250                is_glob: name_str == "*",
2251            };
2252
2253            if let Resolution::Resolved(resolved_path, _) = resolver.resolve(&file_path, &spec, cfg)
2254            {
2255                // Convert absolute resolved path to root-relative string
2256                let resolved_rel = resolved_path
2257                    .strip_prefix(&self.root)
2258                    .unwrap_or(&resolved_path)
2259                    .to_string_lossy()
2260                    .to_string();
2261
2262                self.conn
2263                    .execute(
2264                        "UPDATE imports SET resolved_file = ?1 WHERE file = ?2 AND module = ?3 AND name = ?4 AND resolved_file IS NULL",
2265                        libsql::params![resolved_rel, file_str.clone(), module_str.clone(), name_str.clone()],
2266                    )
2267                    .await?;
2268                resolved_count += 1;
2269            }
2270        }
2271
2272        Ok(resolved_count)
2273    }
2274
2275    /// Follow re-export chains to resolve imports to their ultimate source file.
2276    ///
2277    /// When file A imports `Foo` from file B, but file B re-exports `Foo` from file C
2278    /// (via `pub use c::Foo` in Rust or `export { Foo } from './c'` in TypeScript),
2279    /// this updates A's import row so `resolved_file` points to C instead of B.
2280    ///
2281    /// Runs iteratively (up to `max_depth` passes) to handle chains longer than one hop,
2282    /// stopping early when no rows are updated. Wildcard re-exports (`pub use mod::*`)
2283    /// are handled by following any re-export from the intermediate file.
2284    pub async fn trace_reexports(&self) -> Result<usize, libsql::Error> {
2285        let max_depth = 10usize;
2286        let mut total_updated = 0usize;
2287
2288        for _ in 0..max_depth {
2289            // For each import row whose resolved_file re-exports the imported name
2290            // (or re-exports via wildcard), update resolved_file to point to the
2291            // re-export's own resolved_file (the ultimate source).
2292            //
2293            // A re-export in file B for name N means: imports row where
2294            //   file = B, name = N (or name = '*'), is_reexport = 1, resolved_file IS NOT NULL
2295            //
2296            // We look for imports in A where:
2297            //   resolved_file = B  AND  B has a matching re-export row with its own resolved_file
2298            let updated = self
2299                .conn
2300                .execute(
2301                    "UPDATE imports AS consumer
2302                     SET resolved_file = (
2303                         SELECT reexp.resolved_file
2304                         FROM imports AS reexp
2305                         WHERE reexp.file = consumer.resolved_file
2306                           AND reexp.is_reexport = 1
2307                           AND reexp.resolved_file IS NOT NULL
2308                           AND reexp.resolved_file != consumer.resolved_file
2309                           AND (
2310                               reexp.name = consumer.name
2311                               OR COALESCE(reexp.alias, reexp.name) = consumer.name
2312                               OR reexp.name = '*'
2313                           )
2314                         LIMIT 1
2315                     )
2316                     WHERE consumer.resolved_file IS NOT NULL
2317                       AND EXISTS (
2318                           SELECT 1 FROM imports AS reexp2
2319                           WHERE reexp2.file = consumer.resolved_file
2320                             AND reexp2.is_reexport = 1
2321                             AND reexp2.resolved_file IS NOT NULL
2322                             AND reexp2.resolved_file != consumer.resolved_file
2323                             AND (
2324                                 reexp2.name = consumer.name
2325                                 OR COALESCE(reexp2.alias, reexp2.name) = consumer.name
2326                                 OR reexp2.name = '*'
2327                             )
2328                       )",
2329                    (),
2330                )
2331                .await? as usize;
2332
2333            total_updated += updated;
2334            if updated == 0 {
2335                break;
2336            }
2337        }
2338
2339        Ok(total_updated)
2340    }
2341
2342    /// Resolve call targets: for each call, try to determine which file defines the callee.
2343    ///
2344    /// Uses the import graph: if caller_file imports a name that matches callee_name (or its alias),
2345    /// and that import has a resolved_file, set callee_resolved_file on the call row.
2346    /// Same-file calls (caller_file has a symbol matching callee_name) also get resolved.
2347    pub async fn resolve_all_calls(&self) -> Result<usize, libsql::Error> {
2348        let mut resolved = 0usize;
2349
2350        // 1. Same-file calls: callee defined in the same file as the caller
2351        resolved += self
2352            .conn
2353            .execute(
2354                "UPDATE calls SET callee_resolved_file = caller_file
2355                 WHERE callee_resolved_file IS NULL
2356                   AND callee_qualifier IS NULL
2357                   AND EXISTS (
2358                       SELECT 1 FROM symbols
2359                       WHERE symbols.file = calls.caller_file
2360                         AND symbols.name = calls.callee_name
2361                   )",
2362                (),
2363            )
2364            .await? as usize;
2365
2366        // 2. Import-resolved calls: callee_name matches an import name (or alias)
2367        //    that has a resolved_file
2368        resolved += self
2369            .conn
2370            .execute(
2371                "UPDATE calls SET callee_resolved_file = (
2372                     SELECT i.resolved_file FROM imports i
2373                     WHERE i.file = calls.caller_file
2374                       AND calls.callee_name = COALESCE(i.alias, i.name)
2375                       AND i.resolved_file IS NOT NULL
2376                     LIMIT 1
2377                 )
2378                 WHERE callee_resolved_file IS NULL
2379                   AND callee_qualifier IS NULL
2380                   AND EXISTS (
2381                       SELECT 1 FROM imports i
2382                       WHERE i.file = calls.caller_file
2383                         AND calls.callee_name = COALESCE(i.alias, i.name)
2384                         AND i.resolved_file IS NOT NULL
2385                   )",
2386                (),
2387            )
2388            .await? as usize;
2389
2390        // 3. Qualifier-resolved calls: callee_qualifier matches an import name (or alias)
2391        //    e.g., `module.foo()` where `module` is imported
2392        resolved += self
2393            .conn
2394            .execute(
2395                "UPDATE calls SET callee_resolved_file = (
2396                     SELECT i.resolved_file FROM imports i
2397                     WHERE i.file = calls.caller_file
2398                       AND calls.callee_qualifier = COALESCE(i.alias, i.name)
2399                       AND i.resolved_file IS NOT NULL
2400                     LIMIT 1
2401                 )
2402                 WHERE callee_resolved_file IS NULL
2403                   AND callee_qualifier IS NOT NULL
2404                   AND callee_qualifier != 'self'
2405                   AND EXISTS (
2406                       SELECT 1 FROM imports i
2407                       WHERE i.file = calls.caller_file
2408                         AND calls.callee_qualifier = COALESCE(i.alias, i.name)
2409                         AND i.resolved_file IS NOT NULL
2410                   )",
2411                (),
2412            )
2413            .await? as usize;
2414
2415        // 4. Self-calls: `self.method()` — resolve to the file containing the parent type
2416        //    The caller's parent type is in the same file, so resolve to caller_file.
2417        resolved += self
2418            .conn
2419            .execute(
2420                "UPDATE calls SET callee_resolved_file = caller_file
2421                 WHERE callee_resolved_file IS NULL
2422                   AND callee_qualifier = 'self'",
2423                (),
2424            )
2425            .await? as usize;
2426
2427        Ok(resolved)
2428    }
2429
2430    /// Check if a file exports (defines) a given symbol
2431    async fn file_exports_symbol(&self, file: &str, symbol: &str) -> Result<bool, libsql::Error> {
2432        // Check if symbol is defined in this file (top-level only, parent IS NULL)
2433        let mut rows = self
2434            .conn
2435            .query(
2436                "SELECT COUNT(*) FROM symbols WHERE file = ?1 AND name = ?2 AND parent IS NULL",
2437                params![file, symbol],
2438            )
2439            .await?;
2440        if let Some(row) = rows.next().await? {
2441            let count: i64 = row.get(0)?;
2442            Ok(count > 0)
2443        } else {
2444            Ok(false)
2445        }
2446    }
2447
2448    /// Resolve a name in a file's context to its source module
2449    /// Returns: (source_module, original_name) if found
2450    pub async fn resolve_import(
2451        &self,
2452        file: &str,
2453        name: &str,
2454    ) -> Result<Option<(String, String)>, libsql::Error> {
2455        // Check for direct import or alias
2456        let mut rows = self
2457            .conn
2458            .query(
2459                "SELECT module, name FROM imports WHERE file = ?1 AND (name = ?2 OR alias = ?2)",
2460                params![file, name],
2461            )
2462            .await?;
2463
2464        if let Some(row) = rows.next().await? {
2465            let module: Option<String> = row.get(0)?;
2466            let orig_name: String = row.get(1)?;
2467            if let Some(module) = module {
2468                return Ok(Some((module, orig_name)));
2469            } else {
2470                // Plain import (import X), module is the name
2471                return Ok(Some((orig_name.clone(), orig_name)));
2472            }
2473        }
2474
2475        // Check for wildcard imports - name could come from any of them
2476        let mut rows = self
2477            .conn
2478            .query(
2479                "SELECT module FROM imports WHERE file = ?1 AND name = '*'",
2480                params![file],
2481            )
2482            .await?;
2483        let mut wildcards = Vec::new();
2484        while let Some(row) = rows.next().await? {
2485            if let Ok(Some(module)) = row.get::<Option<String>>(0) {
2486                wildcards.push(module);
2487            }
2488        }
2489
2490        // Check each wildcard source to see if it exports the symbol
2491        for module in &wildcards {
2492            let files = self.module_to_files(module, file).await;
2493            for module_file in files {
2494                if self.file_exports_symbol(&module_file, name).await? {
2495                    return Ok(Some((module.clone(), name.to_string())));
2496                }
2497            }
2498        }
2499
2500        // Fallback: if we have wildcards but couldn't verify, return first as possibility
2501        // This handles external modules (stdlib, third-party) we can't resolve
2502        if !wildcards.is_empty() {
2503            return Ok(Some((wildcards[0].clone(), name.to_string())));
2504        }
2505
2506        Ok(None)
2507    }
2508
2509    /// Find which files import a given module
2510    pub async fn find_importers(
2511        &self,
2512        module: &str,
2513    ) -> Result<Vec<(String, String, usize)>, libsql::Error> {
2514        let pattern = format!("{}%", module);
2515        let mut rows = self
2516            .conn
2517            .query(
2518                "SELECT file, name, line FROM imports WHERE module = ?1 OR module LIKE ?2",
2519                params![module, pattern],
2520            )
2521            .await?;
2522        let mut importers = Vec::new();
2523        while let Some(row) = rows.next().await? {
2524            importers.push((
2525                row.get(0)?,
2526                row.get(1)?,
2527                u64::try_from(row.get::<i64>(2)?).unwrap_or(0) as usize,
2528            ));
2529        }
2530        Ok(importers)
2531    }
2532
2533    /// Check whether a file already has an import named `name` (as `name` or `alias`).
2534    /// Used for rename conflict detection.
2535    pub async fn has_import_named(&self, file: &str, name: &str) -> Result<bool, libsql::Error> {
2536        let mut rows = self
2537            .conn
2538            .query(
2539                "SELECT COUNT(*) FROM imports WHERE file = ?1 AND (name = ?2 OR alias = ?2)",
2540                params![file, name],
2541            )
2542            .await?;
2543        if let Some(row) = rows.next().await? {
2544            let count: i64 = row.get(0)?;
2545            Ok(count > 0)
2546        } else {
2547            Ok(false)
2548        }
2549    }
2550
2551    /// Find files that import a specific symbol by name.
2552    /// Returns: (file, imported_name, alias, line)
2553    /// Useful for rename: find all files that need their import statement updated.
2554    pub async fn find_symbol_importers(
2555        &self,
2556        symbol_name: &str,
2557    ) -> Result<Vec<(String, String, Option<String>, usize)>, libsql::Error> {
2558        let mut rows = self
2559            .conn
2560            .query(
2561                "SELECT file, name, alias, line FROM imports WHERE name = ?1",
2562                params![symbol_name],
2563            )
2564            .await?;
2565        let mut importers = Vec::new();
2566        while let Some(row) = rows.next().await? {
2567            importers.push((
2568                row.get(0)?,
2569                row.get(1)?,
2570                row.get(2)?,
2571                u64::try_from(row.get::<i64>(3)?).unwrap_or(0) as usize,
2572            ));
2573        }
2574        Ok(importers)
2575    }
2576
2577    /// Find files that import a specific symbol by name, including the module path.
2578    /// Returns: (file, imported_name, alias, line, module)
2579    /// Useful for `move`: the recipe needs the original module string so it can rewrite
2580    /// it to the new path verbatim, rather than guessing where the path begins/ends.
2581    pub async fn find_symbol_importers_with_module(
2582        &self,
2583        symbol_name: &str,
2584    ) -> Result<Vec<(String, String, Option<String>, usize, Option<String>)>, libsql::Error> {
2585        let mut rows = self
2586            .conn
2587            .query(
2588                "SELECT file, name, alias, line, module FROM imports WHERE name = ?1",
2589                params![symbol_name],
2590            )
2591            .await?;
2592        let mut importers = Vec::new();
2593        while let Some(row) = rows.next().await? {
2594            importers.push((
2595                row.get(0)?,
2596                row.get(1)?,
2597                row.get(2)?,
2598                u64::try_from(row.get::<i64>(3)?).unwrap_or(0) as usize,
2599                row.get(4)?,
2600            ));
2601        }
2602        Ok(importers)
2603    }
2604
2605    /// Get method names for a type (interface/class) in a specific file.
2606    /// Used for cross-file interface implementation detection.
2607    pub async fn get_type_methods(
2608        &self,
2609        file: &str,
2610        type_name: &str,
2611    ) -> Result<Vec<String>, libsql::Error> {
2612        let mut rows = self
2613            .conn
2614            .query(
2615                "SELECT method_name FROM type_methods WHERE file = ?1 AND type_name = ?2",
2616                params![file, type_name],
2617            )
2618            .await?;
2619        let mut methods = Vec::new();
2620        while let Some(row) = rows.next().await? {
2621            methods.push(row.get(0)?);
2622        }
2623        Ok(methods)
2624    }
2625
2626    /// Find files that define a type by name.
2627    /// Returns all files that have a type (interface/class) with the given name.
2628    pub async fn find_type_definitions(
2629        &self,
2630        type_name: &str,
2631    ) -> Result<Vec<String>, libsql::Error> {
2632        let mut rows = self
2633            .conn
2634            .query(
2635                "SELECT DISTINCT file FROM type_methods WHERE type_name = ?1",
2636                params![type_name],
2637            )
2638            .await?;
2639        let mut files = Vec::new();
2640        while let Some(row) = rows.next().await? {
2641            files.push(row.get(0)?);
2642        }
2643        Ok(files)
2644    }
2645
2646    /// Refresh the call graph by parsing all supported source files
2647    /// This is more expensive than file refresh since it parses every file
2648    /// Uses parallel processing for parsing, sequential insertion for SQLite
2649    pub async fn refresh_call_graph(&mut self) -> Result<CallGraphStats, libsql::Error> {
2650        // Get all indexed source files
2651        let files: Vec<String> = {
2652            let sql = format!(
2653                "SELECT path FROM files WHERE is_dir = 0 AND ({})",
2654                source_extensions_sql_filter()
2655            );
2656            let mut rows = self.conn.query(&sql, ()).await?;
2657            let mut files = Vec::new();
2658            while let Some(row) = rows.next().await? {
2659                let path: String = row.get(0)?;
2660                files.push(path);
2661            }
2662            files
2663        };
2664
2665        // Parse all files in parallel
2666        // Each thread gets its own SymbolParser (tree-sitter parsers have mutable state)
2667        let root = self.root.clone();
2668
2669        // Pre-pass: check CA cache for all files (serial, fast disk reads)
2670        let mut cached_data: Vec<ParsedFileData> = Vec::new();
2671        let mut uncached_files: Vec<String> = Vec::new();
2672        // Files whose symbol data came from CA cache: need CFG rebuilt separately.
2673        let mut ca_cached_files: Vec<String> = Vec::new();
2674
2675        for file_path in &files {
2676            let full_path = root.join(file_path);
2677            let bytes = match std::fs::read(&full_path) {
2678                Ok(b) => b,
2679                Err(_) => {
2680                    uncached_files.push(file_path.clone());
2681                    continue;
2682                }
2683            };
2684            let grammar = match support_for_path(&full_path) {
2685                Some(s) => s.grammar_name().to_string(),
2686                None => {
2687                    uncached_files.push(file_path.clone());
2688                    continue;
2689                }
2690            };
2691            let hash = blake3::hash(&bytes);
2692            if let Some(ca) = &self.ca_cache {
2693                match ca.get::<CachedFileData>(hash.as_bytes(), EXTRACTOR_VERSION, &grammar) {
2694                    Ok(Some(cached)) => {
2695                        ca_cached_files.push(file_path.clone());
2696                        cached_data.push(ParsedFileData {
2697                            file_path: file_path.clone(),
2698                            symbols: cached.symbols,
2699                            calls: cached.calls,
2700                            imports: cached.imports,
2701                            type_methods: cached.type_methods,
2702                            type_refs: cached.type_refs,
2703                            // CFG data is not CA-cached — always rebuilt during parse.
2704                            cfg: CfgData {
2705                                blocks: Vec::new(),
2706                                edges: Vec::new(),
2707                                defs: Vec::new(),
2708                                uses: Vec::new(),
2709                                effects: Vec::new(),
2710                            },
2711                        });
2712                        continue;
2713                    }
2714                    Ok(None) => {}
2715                    Err(e) => {
2716                        tracing::warn!("normalize-facts: CA cache get error: {}", e);
2717                    }
2718                }
2719            }
2720            uncached_files.push(file_path.clone());
2721        }
2722
2723        let ca_cache_for_rayon = self.ca_cache.clone();
2724
2725        let pb = if self.progress && std::io::IsTerminal::is_terminal(&std::io::stderr()) {
2726            let pb = ProgressBar::new(uncached_files.len() as u64);
2727            pb.set_style(
2728                ProgressStyle::with_template(
2729                    "{spinner:.cyan} Parsing symbols... [{bar:30.cyan/dim}] {pos}/{len} files [{elapsed_precise}]",
2730                )
2731                .unwrap_or_else(|_| ProgressStyle::default_bar())
2732                .progress_chars("##-"),
2733            );
2734            pb
2735        } else {
2736            ProgressBar::hidden()
2737        };
2738        let mut parsed_data: Vec<ParsedFileData> = uncached_files
2739            .par_iter()
2740            .progress_with(pb.clone())
2741            .filter_map(|file_path| {
2742                let full_path = root.join(file_path);
2743                let bytes = std::fs::read(&full_path).ok()?;
2744                let content = String::from_utf8_lossy(&bytes).into_owned();
2745
2746                let grammar = support_for_path(&full_path)
2747                    .map(|s| s.grammar_name().to_string())
2748                    .unwrap_or_default();
2749                let hash = blake3::hash(&bytes);
2750
2751                // Each thread creates its own parser
2752                let mut parser = SymbolParser::new();
2753
2754                // parse_file returns None when the grammar .so is unavailable.
2755                // In that case, skip the file entirely — don't index it as empty.
2756                // The missing grammar is already recorded in `parsers::report_missing_grammar`
2757                // (called from `parse_file` -> `try_get_grammar`), so callers can summarise.
2758                let symbols = parser.parse_file(&full_path, &content)?;
2759
2760                let mut sym_data = Vec::with_capacity(symbols.len());
2761                let mut call_data = Vec::new();
2762
2763                for sym in &symbols {
2764                    sym_data.push(ParsedSymbol {
2765                        name: sym.name.clone(),
2766                        kind: sym.kind.as_str().to_string(),
2767                        start_line: sym.start_line,
2768                        end_line: sym.end_line,
2769                        parent: sym.parent.clone(),
2770                        visibility: sym.visibility.as_str().to_string(),
2771                        attributes: sym.attributes.clone(),
2772                        is_interface_impl: sym.is_interface_impl,
2773                        implements: sym.implements.clone(),
2774                        docstring: sym.docstring.clone(),
2775                    });
2776
2777                    // Only index calls for functions/methods
2778                    let kind = sym.kind.as_str();
2779                    if kind == "function" || kind == "method" {
2780                        let calls = parser.find_callees_for_symbol(&full_path, &content, sym);
2781                        for (callee_name, line, qualifier, access) in calls {
2782                            call_data.push((
2783                                sym.name.clone(),
2784                                callee_name,
2785                                qualifier,
2786                                access,
2787                                line,
2788                            ));
2789                        }
2790                    }
2791                }
2792
2793                // Parse imports using trait-based extraction (works for all supported languages)
2794                let imports = parser.parse_imports(&full_path, &content);
2795
2796                // Extract type methods for cross-file interface resolution
2797                // We need to use the full symbol extraction to get hierarchy
2798                let extractor = crate::extract::Extractor::new();
2799                let extract_result = extractor.extract(&full_path, &content);
2800                let mut type_methods = Vec::new();
2801                for sym in &extract_result.symbols {
2802                    if matches!(
2803                        sym.kind,
2804                        normalize_languages::SymbolKind::Interface
2805                            | normalize_languages::SymbolKind::Class
2806                            | normalize_languages::SymbolKind::Trait
2807                            | normalize_languages::SymbolKind::Struct
2808                    ) {
2809                        for child in &sym.children {
2810                            if matches!(
2811                                child.kind,
2812                                normalize_languages::SymbolKind::Method
2813                                    | normalize_languages::SymbolKind::Function
2814                            ) {
2815                                type_methods.push((sym.name.clone(), child.name.clone()));
2816                            }
2817                        }
2818                    }
2819                }
2820
2821                // Extract type references using tree-sitter queries
2822                let type_refs = parser.find_type_refs(&full_path, &content);
2823
2824                // Build CFGs for function/method symbols (best-effort — errors are non-fatal).
2825                let cfg = build_cfg_data_for_file(&full_path, &bytes, grammar.as_str(), &symbols);
2826
2827                // Store result in CA cache (best-effort).
2828                // Grammar availability is already guaranteed above (parse_file returned Some),
2829                // so empty results here are legitimate and safe to cache.
2830                if !grammar.is_empty()
2831                    && let Some(ca) = &ca_cache_for_rayon
2832                {
2833                    let cached = CachedFileData {
2834                        symbols: sym_data
2835                            .iter()
2836                            .map(|s| ParsedSymbol {
2837                                name: s.name.clone(),
2838                                kind: s.kind.clone(),
2839                                start_line: s.start_line,
2840                                end_line: s.end_line,
2841                                parent: s.parent.clone(),
2842                                visibility: s.visibility.clone(),
2843                                attributes: s.attributes.clone(),
2844                                is_interface_impl: s.is_interface_impl,
2845                                implements: s.implements.clone(),
2846                                docstring: s.docstring.clone(),
2847                            })
2848                            .collect(),
2849                        calls: call_data.clone(),
2850                        imports: imports.clone(),
2851                        type_methods: type_methods.clone(),
2852                        type_refs: type_refs.clone(),
2853                    };
2854                    if let Err(e) = ca.put(hash.as_bytes(), EXTRACTOR_VERSION, &grammar, &cached) {
2855                        tracing::warn!("normalize-facts: CA cache put error: {}", e);
2856                    }
2857                }
2858
2859                Some(ParsedFileData {
2860                    file_path: file_path.clone(),
2861                    symbols: sym_data,
2862                    calls: call_data,
2863                    imports,
2864                    type_methods,
2865                    type_refs,
2866                    cfg,
2867                })
2868            })
2869            .collect();
2870
2871        // Merge CA-cached results
2872        parsed_data.extend(cached_data);
2873
2874        // Build CFG data for files that came from the CA cache (their cfg vecs are empty).
2875        if !ca_cached_files.is_empty() {
2876            // For each cached file, rebuild CFG data using a fresh parser (re-reads the file).
2877            let cfg_updates: Vec<(String, CfgData)> = ca_cached_files
2878                .par_iter()
2879                .filter_map(|file_path| {
2880                    let full_path = root.join(file_path);
2881                    let bytes = std::fs::read(&full_path).ok()?;
2882                    let lang_support = support_for_path(&full_path)?;
2883                    let grammar_name = lang_support.grammar_name();
2884                    let symbols: Vec<FlatSymbol> = {
2885                        let p = SymbolParser::new();
2886                        let content = String::from_utf8_lossy(&bytes).into_owned();
2887                        p.parse_file(&full_path, &content)?
2888                    };
2889                    let cfg = build_cfg_data_for_file(&full_path, &bytes, grammar_name, &symbols);
2890                    Some((file_path.clone(), cfg))
2891                })
2892                .collect();
2893            // Patch cfg into parsed_data.
2894            for (fpath, cfg) in cfg_updates {
2895                if let Some(data) = parsed_data.iter_mut().find(|d| d.file_path == fpath) {
2896                    data.cfg = cfg;
2897                }
2898            }
2899        }
2900
2901        pb.finish_and_clear();
2902
2903        let pb_insert = if self.progress && std::io::IsTerminal::is_terminal(&std::io::stderr()) {
2904            let pb = ProgressBar::new(parsed_data.len() as u64);
2905            pb.set_style(
2906                ProgressStyle::with_template(
2907                    "{spinner:.cyan} Storing index... [{bar:30.cyan/dim}] {pos}/{len} files [{elapsed_precise}]",
2908                )
2909                .unwrap_or_else(|_| ProgressStyle::default_bar())
2910                .progress_chars("##-"),
2911            );
2912            pb
2913        } else {
2914            ProgressBar::hidden()
2915        };
2916
2917        self.conn.execute("BEGIN", ()).await?;
2918
2919        // Clear existing data
2920        self.conn.execute("DELETE FROM symbols", ()).await?;
2921        self.conn.execute("DELETE FROM calls", ()).await?;
2922        self.conn.execute("DELETE FROM imports", ()).await?;
2923        self.conn.execute("DELETE FROM type_methods", ()).await?;
2924        self.conn.execute("DELETE FROM type_refs", ()).await?;
2925        self.conn
2926            .execute("DELETE FROM symbol_attributes", ())
2927            .await?;
2928        self.conn
2929            .execute("DELETE FROM symbol_implements", ())
2930            .await?;
2931        self.conn.execute("DELETE FROM cfg_blocks", ()).await?;
2932        self.conn.execute("DELETE FROM cfg_edges", ()).await?;
2933        self.conn.execute("DELETE FROM cfg_defs", ()).await?;
2934        self.conn.execute("DELETE FROM cfg_uses", ()).await?;
2935        self.conn.execute("DELETE FROM cfg_effects", ()).await?;
2936
2937        let mut symbol_count = 0;
2938        let mut call_count = 0;
2939        let mut import_count = 0;
2940
2941        for data in &parsed_data {
2942            for sym in &data.symbols {
2943                self.conn.execute(
2944                    "INSERT INTO symbols (file, name, kind, start_line, end_line, parent, visibility, is_impl) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
2945                    params![data.file_path.clone(), sym.name.clone(), sym.kind.clone(), sym.start_line as i64, sym.end_line as i64, sym.parent.clone(), sym.visibility.clone(), sym.is_interface_impl as i64],
2946                ).await?;
2947                for attr in &sym.attributes {
2948                    self.conn.execute(
2949                        "INSERT INTO symbol_attributes (file, name, attribute) VALUES (?1, ?2, ?3)",
2950                        params![data.file_path.clone(), sym.name.clone(), attr.clone()],
2951                    ).await?;
2952                }
2953                if let Some(doc) = &sym.docstring {
2954                    self.conn.execute(
2955                        "INSERT INTO symbol_attributes (file, name, attribute) VALUES (?1, ?2, ?3)",
2956                        params![data.file_path.clone(), sym.name.clone(), format!("doc:{doc}")],
2957                    ).await?;
2958                }
2959                for iface in &sym.implements {
2960                    self.conn.execute(
2961                        "INSERT INTO symbol_implements (file, name, interface) VALUES (?1, ?2, ?3)",
2962                        params![data.file_path.clone(), sym.name.clone(), iface.clone()],
2963                    ).await?;
2964                }
2965                symbol_count += 1;
2966            }
2967
2968            for (caller_symbol, callee_name, qualifier, access, line) in &data.calls {
2969                self.conn.execute(
2970                    "INSERT INTO calls (caller_file, caller_symbol, callee_name, callee_qualifier, access, line) VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
2971                    params![data.file_path.clone(), caller_symbol.clone(), callee_name.clone(), qualifier.clone(), access.clone(), *line as i64],
2972                ).await?;
2973                call_count += 1;
2974            }
2975
2976            for imp in &data.imports {
2977                self.conn.execute(
2978                    "INSERT INTO imports (file, module, name, alias, line, is_reexport) VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
2979                    params![data.file_path.clone(), imp.module.clone(), imp.name.clone(), imp.alias.clone(), imp.line as i64, imp.is_reexport as i64],
2980                ).await?;
2981                import_count += 1;
2982            }
2983
2984            for (type_name, method_name) in &data.type_methods {
2985                self.conn.execute(
2986                    "INSERT OR IGNORE INTO type_methods (file, type_name, method_name) VALUES (?1, ?2, ?3)",
2987                    params![data.file_path.clone(), type_name.clone(), method_name.clone()],
2988                ).await?;
2989            }
2990
2991            for tr in &data.type_refs {
2992                self.conn.execute(
2993                    "INSERT INTO type_refs (file, source_symbol, target_type, kind, line) VALUES (?1, ?2, ?3, ?4, ?5)",
2994                    params![data.file_path.clone(), tr.source_symbol.clone(), tr.target_type.clone(), tr.kind.as_str(), tr.line as i64],
2995                ).await?;
2996            }
2997
2998            // Insert CFG blocks
2999            for blk in &data.cfg.blocks {
3000                self.conn.execute(
3001                    "INSERT OR IGNORE INTO cfg_blocks (file, function_qname, function_start_line, block_id, kind, byte_start, byte_end, start_line, end_line) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)",
3002                    params![
3003                        data.file_path.clone(),
3004                        blk.function_qname.clone(),
3005                        blk.function_start_line as i64,
3006                        blk.block_id as i64,
3007                        blk.kind.clone(),
3008                        blk.byte_start as i64,
3009                        blk.byte_end as i64,
3010                        blk.start_line as i64,
3011                        blk.end_line as i64,
3012                    ],
3013                ).await?;
3014            }
3015            // Insert CFG edges
3016            for edge in &data.cfg.edges {
3017                self.conn.execute(
3018                    "INSERT INTO cfg_edges (file, function_qname, function_start_line, from_block, to_block, kind, exception_type) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
3019                    params![
3020                        data.file_path.clone(),
3021                        edge.function_qname.clone(),
3022                        edge.function_start_line as i64,
3023                        edge.from_block as i64,
3024                        edge.to_block as i64,
3025                        edge.kind.clone(),
3026                        edge.exception_type.clone(),
3027                    ],
3028                ).await?;
3029            }
3030            // Insert CFG defs
3031            for def in &data.cfg.defs {
3032                self.conn.execute(
3033                    "INSERT INTO cfg_defs (file, function_qname, function_start_line, block_id, name, byte_offset, line) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
3034                    params![
3035                        data.file_path.clone(),
3036                        def.function_qname.clone(),
3037                        def.function_start_line as i64,
3038                        def.block_id as i64,
3039                        def.name.clone(),
3040                        def.byte_offset as i64,
3041                        def.line as i64,
3042                    ],
3043                ).await?;
3044            }
3045            // Insert CFG uses
3046            for use_ in &data.cfg.uses {
3047                self.conn.execute(
3048                    "INSERT INTO cfg_uses (file, function_qname, function_start_line, block_id, name, byte_offset, line) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
3049                    params![
3050                        data.file_path.clone(),
3051                        use_.function_qname.clone(),
3052                        use_.function_start_line as i64,
3053                        use_.block_id as i64,
3054                        use_.name.clone(),
3055                        use_.byte_offset as i64,
3056                        use_.line as i64,
3057                    ],
3058                ).await?;
3059            }
3060            // Insert CFG effects
3061            for eff in &data.cfg.effects {
3062                self.conn.execute(
3063                    "INSERT INTO cfg_effects (file, function_qname, function_start_line, block_id, kind, byte_offset, line, label) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
3064                    params![
3065                        data.file_path.clone(),
3066                        eff.function_qname.clone(),
3067                        eff.function_start_line as i64,
3068                        eff.block_id as i64,
3069                        eff.kind.clone(),
3070                        eff.byte_offset as i64,
3071                        eff.line as i64,
3072                        eff.label.clone(),
3073                    ],
3074                ).await?;
3075            }
3076
3077            pb_insert.inc(1);
3078        }
3079
3080        pb_insert.finish_and_clear();
3081
3082        self.conn.execute("COMMIT", ()).await?;
3083
3084        // Resolve import module specifiers to root-relative file paths now that all
3085        // files are indexed. Must run after COMMIT so module_to_files() can query them.
3086        self.resolve_all_imports().await.unwrap_or_else(|e| {
3087            tracing::warn!("normalize-facts: resolve_all_imports error: {}", e);
3088            0
3089        });
3090        // Second pass: use per-language ModuleResolver for remaining unresolved imports.
3091        self.resolve_imports_via_module_resolver()
3092            .await
3093            .unwrap_or_else(|e| {
3094                tracing::warn!(
3095                    "normalize-facts: resolve_imports_via_module_resolver error: {}",
3096                    e
3097                );
3098                0
3099            });
3100        // Follow re-export chains so imports resolve to ultimate source files.
3101        self.trace_reexports().await.unwrap_or_else(|e| {
3102            tracing::warn!("normalize-facts: trace_reexports error: {}", e);
3103            0
3104        });
3105        // Resolve call targets using the now-populated import graph.
3106        self.resolve_all_calls().await.unwrap_or_else(|e| {
3107            tracing::warn!("normalize-facts: resolve_all_calls error: {}", e);
3108            0
3109        });
3110
3111        Ok(CallGraphStats {
3112            symbols: symbol_count,
3113            calls: call_count,
3114            imports: import_count,
3115        })
3116    }
3117
3118    /// Reindex specific files: delete old data and re-extract symbols/calls/imports.
3119    /// Expects to be called inside a transaction.
3120    async fn reindex_files(
3121        &self,
3122        deleted_files: &[String],
3123        changed_files: &[String],
3124    ) -> Result<CallGraphStats, libsql::Error> {
3125        // Remove data for deleted/modified files
3126        for path in deleted_files.iter().chain(changed_files.iter()) {
3127            self.conn
3128                .execute("DELETE FROM symbols WHERE file = ?1", params![path.clone()])
3129                .await?;
3130            self.conn
3131                .execute(
3132                    "DELETE FROM calls WHERE caller_file = ?1",
3133                    params![path.clone()],
3134                )
3135                .await?;
3136            self.conn
3137                .execute("DELETE FROM imports WHERE file = ?1", params![path.clone()])
3138                .await?;
3139            self.conn
3140                .execute(
3141                    "DELETE FROM symbol_attributes WHERE file = ?1",
3142                    params![path.clone()],
3143                )
3144                .await?;
3145            self.conn
3146                .execute(
3147                    "DELETE FROM symbol_implements WHERE file = ?1",
3148                    params![path.clone()],
3149                )
3150                .await?;
3151            self.conn
3152                .execute(
3153                    "DELETE FROM type_refs WHERE file = ?1",
3154                    params![path.clone()],
3155                )
3156                .await?;
3157            self.conn
3158                .execute(
3159                    "DELETE FROM cfg_blocks WHERE file = ?1",
3160                    params![path.clone()],
3161                )
3162                .await?;
3163            self.conn
3164                .execute(
3165                    "DELETE FROM cfg_edges WHERE file = ?1",
3166                    params![path.clone()],
3167                )
3168                .await?;
3169            self.conn
3170                .execute(
3171                    "DELETE FROM cfg_defs WHERE file = ?1",
3172                    params![path.clone()],
3173                )
3174                .await?;
3175            self.conn
3176                .execute(
3177                    "DELETE FROM cfg_uses WHERE file = ?1",
3178                    params![path.clone()],
3179                )
3180                .await?;
3181            self.conn
3182                .execute(
3183                    "DELETE FROM cfg_effects WHERE file = ?1",
3184                    params![path.clone()],
3185                )
3186                .await?;
3187        }
3188
3189        let mut parser = SymbolParser::new();
3190        let mut symbol_count = 0;
3191        let mut call_count = 0;
3192        let mut import_count = 0;
3193
3194        // Parse changed files
3195        for file_path in changed_files {
3196            let full_path = self.root.join(file_path);
3197            let bytes = match std::fs::read(&full_path) {
3198                Ok(b) => b,
3199                Err(_) => continue,
3200            };
3201
3202            let grammar = support_for_path(&full_path)
3203                .map(|s| s.grammar_name().to_string())
3204                .unwrap_or_default();
3205            let hash = blake3::hash(&bytes);
3206
3207            // Try CA cache first (best-effort)
3208            let cached: Option<CachedFileData> = if !grammar.is_empty() {
3209                self.ca_cache.as_ref().and_then(|ca| {
3210                    ca.get::<CachedFileData>(hash.as_bytes(), EXTRACTOR_VERSION, &grammar)
3211                        .unwrap_or_else(|e| {
3212                            tracing::warn!("normalize-facts: CA cache get error: {}", e);
3213                            None
3214                        })
3215                })
3216            } else {
3217                None
3218            };
3219
3220            let (sym_data, call_data, imports, type_refs) = if let Some(c) = cached {
3221                (c.symbols, c.calls, c.imports, c.type_refs)
3222            } else {
3223                let content = String::from_utf8_lossy(&bytes).into_owned();
3224
3225                // parse_file returns None when the grammar .so is unavailable.
3226                // Skip the file entirely — don't index it as empty.
3227                // The missing grammar is already recorded in `parsers::report_missing_grammar`
3228                // (called from `parse_file` -> `try_get_grammar`), so callers can summarise.
3229                let symbols = match parser.parse_file(&full_path, &content) {
3230                    Some(s) => s,
3231                    None => continue,
3232                };
3233
3234                let mut sym_data = Vec::with_capacity(symbols.len());
3235                let mut call_data_local: Vec<CallEntry> = Vec::new();
3236
3237                for sym in &symbols {
3238                    sym_data.push(ParsedSymbol {
3239                        name: sym.name.clone(),
3240                        kind: sym.kind.as_str().to_string(),
3241                        start_line: sym.start_line,
3242                        end_line: sym.end_line,
3243                        parent: sym.parent.clone(),
3244                        visibility: sym.visibility.as_str().to_string(),
3245                        attributes: sym.attributes.clone(),
3246                        is_interface_impl: sym.is_interface_impl,
3247                        implements: sym.implements.clone(),
3248                        docstring: sym.docstring.clone(),
3249                    });
3250                    let kind = sym.kind.as_str();
3251                    if kind == "function" || kind == "method" {
3252                        let calls = parser.find_callees_for_symbol(&full_path, &content, sym);
3253                        for (callee_name, line, qualifier, access) in calls {
3254                            call_data_local.push((
3255                                sym.name.clone(),
3256                                callee_name,
3257                                qualifier,
3258                                access,
3259                                line,
3260                            ));
3261                        }
3262                    }
3263                }
3264
3265                let imports = parser.parse_imports(&full_path, &content);
3266                let type_refs = parser.find_type_refs(&full_path, &content);
3267
3268                // Store in CA cache (best-effort).
3269                // Grammar availability is already guaranteed above (parse_file returned Some),
3270                // so empty results here are legitimate and safe to cache.
3271                if !grammar.is_empty()
3272                    && let Some(ca) = &self.ca_cache
3273                {
3274                    let cached_store = CachedFileData {
3275                        symbols: sym_data
3276                            .iter()
3277                            .map(|s| ParsedSymbol {
3278                                name: s.name.clone(),
3279                                kind: s.kind.clone(),
3280                                start_line: s.start_line,
3281                                end_line: s.end_line,
3282                                parent: s.parent.clone(),
3283                                visibility: s.visibility.clone(),
3284                                attributes: s.attributes.clone(),
3285                                is_interface_impl: s.is_interface_impl,
3286                                implements: s.implements.clone(),
3287                                docstring: s.docstring.clone(),
3288                            })
3289                            .collect(),
3290                        calls: call_data_local.clone(),
3291                        imports: imports.clone(),
3292                        type_methods: Vec::new(), // type_methods not extracted in incremental path
3293                        type_refs: type_refs.clone(),
3294                    };
3295                    if let Err(e) =
3296                        ca.put(hash.as_bytes(), EXTRACTOR_VERSION, &grammar, &cached_store)
3297                    {
3298                        tracing::warn!("normalize-facts: CA cache put error: {}", e);
3299                    }
3300                }
3301
3302                (sym_data, call_data_local, imports, type_refs)
3303            };
3304
3305            // Insert symbols
3306            for sym in &sym_data {
3307                self.conn.execute(
3308                    "INSERT INTO symbols (file, name, kind, start_line, end_line, parent, visibility, is_impl) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
3309                    params![file_path.clone(), sym.name.clone(), sym.kind.clone(), sym.start_line as i64, sym.end_line as i64, sym.parent.clone(), sym.visibility.clone(), sym.is_interface_impl as i64],
3310                ).await?;
3311                for attr in &sym.attributes {
3312                    self.conn.execute(
3313                        "INSERT INTO symbol_attributes (file, name, attribute) VALUES (?1, ?2, ?3)",
3314                        params![file_path.clone(), sym.name.clone(), attr.clone()],
3315                    ).await?;
3316                }
3317                if let Some(doc) = &sym.docstring {
3318                    self.conn.execute(
3319                        "INSERT INTO symbol_attributes (file, name, attribute) VALUES (?1, ?2, ?3)",
3320                        params![file_path.clone(), sym.name.clone(), format!("doc:{doc}")],
3321                    ).await?;
3322                }
3323                for iface in &sym.implements {
3324                    self.conn.execute(
3325                        "INSERT INTO symbol_implements (file, name, interface) VALUES (?1, ?2, ?3)",
3326                        params![file_path.clone(), sym.name.clone(), iface.clone()],
3327                    ).await?;
3328                }
3329                symbol_count += 1;
3330            }
3331
3332            // Insert calls
3333            for (caller_symbol, callee_name, qualifier, access, line) in &call_data {
3334                self.conn.execute(
3335                    "INSERT INTO calls (caller_file, caller_symbol, callee_name, callee_qualifier, access, line) VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
3336                    params![file_path.clone(), caller_symbol.clone(), callee_name.clone(), qualifier.clone(), access.clone(), *line as i64],
3337                ).await?;
3338                call_count += 1;
3339            }
3340
3341            // Insert imports
3342            for imp in &imports {
3343                self.conn.execute(
3344                    "INSERT INTO imports (file, module, name, alias, line, is_reexport) VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
3345                    params![file_path.clone(), imp.module.clone(), imp.name.clone(), imp.alias.clone(), imp.line as i64, imp.is_reexport as i64],
3346                ).await?;
3347                import_count += 1;
3348            }
3349
3350            // Insert type references
3351            for tr in &type_refs {
3352                self.conn.execute(
3353                    "INSERT INTO type_refs (file, source_symbol, target_type, kind, line) VALUES (?1, ?2, ?3, ?4, ?5)",
3354                    params![file_path.clone(), tr.source_symbol.clone(), tr.target_type.clone(), tr.kind.as_str(), tr.line as i64],
3355                ).await?;
3356            }
3357
3358            // Build and insert CFG data (best-effort).
3359            let full_path_for_cfg = self.root.join(file_path);
3360            let grammar_for_cfg = support_for_path(&full_path_for_cfg)
3361                .map(|s| s.grammar_name().to_string())
3362                .unwrap_or_default();
3363            if !grammar_for_cfg.is_empty() {
3364                // Parse FlatSymbol list to get function symbols (needed for CFG building).
3365                let flat_symbols: Vec<FlatSymbol> = {
3366                    let p = SymbolParser::new();
3367                    let content = String::from_utf8_lossy(&bytes).into_owned();
3368                    p.parse_file(&full_path_for_cfg, &content)
3369                        .unwrap_or_default()
3370                };
3371                let cfg_data = build_cfg_data_for_file(
3372                    &full_path_for_cfg,
3373                    &bytes,
3374                    &grammar_for_cfg,
3375                    &flat_symbols,
3376                );
3377                for blk in &cfg_data.blocks {
3378                    self.conn.execute(
3379                        "INSERT OR IGNORE INTO cfg_blocks (file, function_qname, function_start_line, block_id, kind, byte_start, byte_end, start_line, end_line) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)",
3380                        params![
3381                            file_path.clone(),
3382                            blk.function_qname.clone(),
3383                            blk.function_start_line as i64,
3384                            blk.block_id as i64,
3385                            blk.kind.clone(),
3386                            blk.byte_start as i64,
3387                            blk.byte_end as i64,
3388                            blk.start_line as i64,
3389                            blk.end_line as i64,
3390                        ],
3391                    ).await?;
3392                }
3393                for edge in &cfg_data.edges {
3394                    self.conn.execute(
3395                        "INSERT INTO cfg_edges (file, function_qname, function_start_line, from_block, to_block, kind, exception_type) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
3396                        params![
3397                            file_path.clone(),
3398                            edge.function_qname.clone(),
3399                            edge.function_start_line as i64,
3400                            edge.from_block as i64,
3401                            edge.to_block as i64,
3402                            edge.kind.clone(),
3403                            edge.exception_type.clone(),
3404                        ],
3405                    ).await?;
3406                }
3407                for def in &cfg_data.defs {
3408                    self.conn.execute(
3409                        "INSERT INTO cfg_defs (file, function_qname, function_start_line, block_id, name, byte_offset, line) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
3410                        params![
3411                            file_path.clone(),
3412                            def.function_qname.clone(),
3413                            def.function_start_line as i64,
3414                            def.block_id as i64,
3415                            def.name.clone(),
3416                            def.byte_offset as i64,
3417                            def.line as i64,
3418                        ],
3419                    ).await?;
3420                }
3421                for use_ in &cfg_data.uses {
3422                    self.conn.execute(
3423                        "INSERT INTO cfg_uses (file, function_qname, function_start_line, block_id, name, byte_offset, line) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
3424                        params![
3425                            file_path.clone(),
3426                            use_.function_qname.clone(),
3427                            use_.function_start_line as i64,
3428                            use_.block_id as i64,
3429                            use_.name.clone(),
3430                            use_.byte_offset as i64,
3431                            use_.line as i64,
3432                        ],
3433                    ).await?;
3434                }
3435                for eff in &cfg_data.effects {
3436                    self.conn.execute(
3437                        "INSERT INTO cfg_effects (file, function_qname, function_start_line, block_id, kind, byte_offset, line, label) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
3438                        params![
3439                            file_path.clone(),
3440                            eff.function_qname.clone(),
3441                            eff.function_start_line as i64,
3442                            eff.block_id as i64,
3443                            eff.kind.clone(),
3444                            eff.byte_offset as i64,
3445                            eff.line as i64,
3446                            eff.label.clone(),
3447                        ],
3448                    ).await?;
3449                }
3450            }
3451        }
3452
3453        Ok(CallGraphStats {
3454            symbols: symbol_count,
3455            calls: call_count,
3456            imports: import_count,
3457        })
3458    }
3459
3460    /// Incrementally update call graph for changed files only.
3461    /// Much faster than full refresh when few files changed.
3462    pub async fn incremental_call_graph_refresh(
3463        &mut self,
3464    ) -> Result<CallGraphStats, libsql::Error> {
3465        let changed = self.get_changed_files().await?;
3466
3467        // Only process supported source and data files
3468        let changed_files: Vec<String> = changed
3469            .added
3470            .into_iter()
3471            .chain(changed.modified.into_iter())
3472            .filter(|f| is_source_file(f))
3473            .collect();
3474
3475        let deleted_source_files: Vec<String> = changed
3476            .deleted
3477            .into_iter()
3478            .filter(|f| is_source_file(f))
3479            .collect();
3480
3481        if changed_files.is_empty() && deleted_source_files.is_empty() {
3482            return Ok(CallGraphStats::default());
3483        }
3484
3485        self.conn.execute("BEGIN", ()).await?;
3486        let stats = self
3487            .reindex_files(&deleted_source_files, &changed_files)
3488            .await?;
3489        self.conn.execute("COMMIT", ()).await?;
3490
3491        // Resolve any newly inserted imports to root-relative file paths.
3492        self.resolve_all_imports().await.unwrap_or_else(|e| {
3493            tracing::warn!("normalize-facts: resolve_all_imports error: {}", e);
3494            0
3495        });
3496        // Second pass: use per-language ModuleResolver for remaining unresolved imports.
3497        self.resolve_imports_via_module_resolver()
3498            .await
3499            .unwrap_or_else(|e| {
3500                tracing::warn!(
3501                    "normalize-facts: resolve_imports_via_module_resolver error: {}",
3502                    e
3503                );
3504                0
3505            });
3506        // Follow re-export chains so imports resolve to ultimate source files.
3507        self.trace_reexports().await.unwrap_or_else(|e| {
3508            tracing::warn!("normalize-facts: trace_reexports error: {}", e);
3509            0
3510        });
3511        // Resolve call targets using the now-populated import graph.
3512        self.resolve_all_calls().await.unwrap_or_else(|e| {
3513            tracing::warn!("normalize-facts: resolve_all_calls error: {}", e);
3514            0
3515        });
3516
3517        Ok(stats)
3518    }
3519
3520    /// Update the index for a single file (used by LSP on save).
3521    /// Skips filesystem walk — directly reindexes the given path and resolves imports/calls.
3522    pub async fn update_file(&mut self, rel_path: &str) -> Result<CallGraphStats, libsql::Error> {
3523        let full_path = self.root.join(rel_path);
3524        let exists = full_path.exists();
3525
3526        // Update the files table mtime
3527        if exists {
3528            let metadata = std::fs::metadata(&full_path).ok();
3529            let mtime = metadata
3530                .and_then(|m| m.modified().ok())
3531                .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
3532                .map(|d| d.as_secs() as i64)
3533                .unwrap_or(0);
3534            self.conn
3535                .execute(
3536                    "UPDATE files SET mtime = ?1 WHERE path = ?2",
3537                    params![mtime, rel_path.to_string()],
3538                )
3539                .await?;
3540        }
3541
3542        if !is_source_file(rel_path) {
3543            return Ok(CallGraphStats::default());
3544        }
3545
3546        self.conn.execute("BEGIN", ()).await?;
3547        let stats = if exists {
3548            self.reindex_files(&[], &[rel_path.to_string()]).await?
3549        } else {
3550            self.reindex_files(&[rel_path.to_string()], &[]).await?
3551        };
3552        self.conn.execute("COMMIT", ()).await?;
3553
3554        self.resolve_all_imports().await.unwrap_or_else(|e| {
3555            tracing::warn!("normalize-facts: resolve_all_imports error: {}", e);
3556            0
3557        });
3558        self.resolve_imports_via_module_resolver()
3559            .await
3560            .unwrap_or_else(|e| {
3561                tracing::warn!(
3562                    "normalize-facts: resolve_imports_via_module_resolver error: {}",
3563                    e
3564                );
3565                0
3566            });
3567        self.trace_reexports().await.unwrap_or_else(|e| {
3568            tracing::warn!("normalize-facts: trace_reexports error: {}", e);
3569            0
3570        });
3571        self.resolve_all_calls().await.unwrap_or_else(|e| {
3572            tracing::warn!("normalize-facts: resolve_all_calls error: {}", e);
3573            0
3574        });
3575
3576        Ok(stats)
3577    }
3578
3579    /// Check if call graph needs refresh
3580    #[allow(dead_code)] // FileIndex API - used by daemon
3581    pub async fn needs_call_graph_refresh(&self) -> bool {
3582        self.call_graph_stats().await.unwrap_or_default().symbols == 0
3583    }
3584
3585    /// Find files matching a query using LIKE (fast pre-filter)
3586    /// Splits query by whitespace/separators and requires all parts to match
3587    /// Special case: queries starting with '.' are treated as extension patterns
3588    pub async fn find_like(&self, query: &str) -> Result<Vec<IndexedFile>, libsql::Error> {
3589        // Handle extension patterns (e.g., ".rs", ".py")
3590        if query.starts_with('.') && !query.contains('/') {
3591            let pattern = format!("%{}", query.to_lowercase());
3592            let mut rows = self.conn.query(
3593                "SELECT path, is_dir, mtime, lines FROM files WHERE LOWER(path) LIKE ?1 LIMIT 1000",
3594                params![pattern],
3595            ).await?;
3596            let mut files = Vec::new();
3597            while let Some(row) = rows.next().await? {
3598                files.push(IndexedFile {
3599                    path: row.get(0)?,
3600                    is_dir: row.get::<i64>(1)? != 0,
3601                    mtime: row.get(2)?,
3602                    lines: u64::try_from(row.get::<i64>(3)?).unwrap_or(0) as usize,
3603                });
3604            }
3605            return Ok(files);
3606        }
3607
3608        // Normalize query: split on whitespace and common separators (but not '.')
3609        let parts: Vec<&str> = query
3610            .split(|c: char| c.is_whitespace() || c == '_' || c == '-')
3611            .filter(|s| !s.is_empty())
3612            .collect();
3613
3614        if parts.is_empty() {
3615            return Ok(Vec::new());
3616        }
3617
3618        // Cap to 4 parts before building SQL so ?1..?N matches the bound params count.
3619        let parts: Vec<&str> = parts.into_iter().take(4).collect();
3620
3621        // Build WHERE clause: LOWER(path) LIKE '%part1%' AND LOWER(path) LIKE '%part2%' ...
3622        let conditions: Vec<String> = (0..parts.len())
3623            .map(|i| format!("LOWER(path) LIKE ?{}", i + 1))
3624            .collect();
3625        let sql = format!(
3626            "SELECT path, is_dir, mtime, lines FROM files WHERE {} LIMIT 50",
3627            conditions.join(" AND ")
3628        );
3629
3630        let patterns: Vec<String> = parts
3631            .iter()
3632            .map(|p| format!("%{}%", p.to_lowercase()))
3633            .collect();
3634
3635        // For dynamic params, we need to build them differently
3636        // libsql doesn't support dynamic parameter slices the same way
3637        // Use a simpler approach for up to common cases
3638        let mut files = Vec::new();
3639        let mut rows = match patterns.len() {
3640            1 => self.conn.query(&sql, params![patterns[0].clone()]).await?,
3641            2 => {
3642                self.conn
3643                    .query(&sql, params![patterns[0].clone(), patterns[1].clone()])
3644                    .await?
3645            }
3646            3 => {
3647                self.conn
3648                    .query(
3649                        &sql,
3650                        params![
3651                            patterns[0].clone(),
3652                            patterns[1].clone(),
3653                            patterns[2].clone()
3654                        ],
3655                    )
3656                    .await?
3657            }
3658            4 => {
3659                self.conn
3660                    .query(
3661                        &sql,
3662                        params![
3663                            patterns[0].clone(),
3664                            patterns[1].clone(),
3665                            patterns[2].clone(),
3666                            patterns[3].clone()
3667                        ],
3668                    )
3669                    .await?
3670            }
3671            // parts is capped to 4 above, so len > 4 is unreachable
3672            _ => unreachable!("parts capped to 4"),
3673        };
3674
3675        while let Some(row) = rows.next().await? {
3676            files.push(IndexedFile {
3677                path: row.get(0)?,
3678                is_dir: row.get::<i64>(1)? != 0,
3679                mtime: row.get(2)?,
3680                lines: u64::try_from(row.get::<i64>(3)?).unwrap_or(0) as usize,
3681            });
3682        }
3683        Ok(files)
3684    }
3685
3686    /// Rebuild (or incrementally update) the co-change edges table from git history.
3687    ///
3688    /// When `since_commit` is `None`, performs a full rebuild: clears the table and walks
3689    /// all commits. When `since_commit` is `Some(sha)`, walks only commits after that SHA
3690    /// and merges counts into the existing table before re-applying the per-file fanout cap.
3691    ///
3692    /// Algorithm:
3693    /// 1. Walk commits via gix (pure-Rust, no `git` binary required).
3694    /// 2. For each commit: skip if it touches >50 files (large mechanical commit, no signal).
3695    /// 3. For each pair of source files in a commit: increment co-change count.
3696    /// 4. Apply filters: drop pairs with count < 2, cap each file to top 20 partners.
3697    /// 5. Upsert into `co_change_edges`.
3698    /// 6. Record HEAD SHA in `meta.co_change_last_commit` for incremental use.
3699    pub async fn rebuild_co_change_edges(
3700        &self,
3701        since_commit: Option<&str>,
3702    ) -> Result<usize, libsql::Error> {
3703        use std::collections::HashMap;
3704
3705        let root = &self.root;
3706
3707        // Open gix repository. If not a git repo, silently skip (not an error).
3708        let repo = match open_gix_repo(root) {
3709            Some(r) => r,
3710            None => {
3711                tracing::debug!("co-change: no git repository found at {:?}, skipping", root);
3712                return Ok(0);
3713            }
3714        };
3715
3716        let head_sha = match repo.head_id() {
3717            Ok(id) => id.to_string(),
3718            Err(_) => return Ok(0),
3719        };
3720
3721        // Walk commits, collecting per-commit file lists.
3722        let commit_files = walk_commits_for_co_change(&repo, since_commit);
3723
3724        if commit_files.is_empty() && since_commit.is_none() {
3725            // No history (or empty repo): ensure table is cleared and metadata stored.
3726            self.conn.execute("DELETE FROM co_change_edges", ()).await?;
3727            self.conn
3728                .execute(
3729                    "INSERT OR REPLACE INTO meta (key, value) VALUES ('co_change_last_commit', ?1)",
3730                    params![head_sha],
3731                )
3732                .await?;
3733            return Ok(0);
3734        }
3735
3736        // For incremental: load existing counts from DB, merge new counts, re-apply cap.
3737        // For full: start fresh.
3738        let mut pair_counts: HashMap<(String, String), usize> = HashMap::new();
3739
3740        if since_commit.is_some() {
3741            // Load existing edges into the map so we can merge.
3742            let mut rows = self
3743                .conn
3744                .query("SELECT file_a, file_b, count FROM co_change_edges", ())
3745                .await?;
3746            while let Some(row) = rows.next().await? {
3747                let a: String = row.get(0)?;
3748                let b: String = row.get(1)?;
3749                let c: i64 = row.get(2)?;
3750                pair_counts.insert((a, b), c as usize);
3751            }
3752        }
3753
3754        // Accumulate new commit data.
3755        for files in &commit_files {
3756            // Files are already filtered to source files only.
3757            if files.len() > 50 || files.len() < 2 {
3758                continue;
3759            }
3760            let mut sorted = files.clone();
3761            sorted.sort_unstable();
3762            sorted.dedup();
3763            for i in 0..sorted.len() {
3764                for j in (i + 1)..sorted.len() {
3765                    let key = (sorted[i].clone(), sorted[j].clone());
3766                    *pair_counts.entry(key).or_default() += 1;
3767                }
3768            }
3769        }
3770
3771        // Apply filters: drop count < 2, apply per-file top-20 fanout cap.
3772        pair_counts.retain(|_, v| *v >= 2);
3773        let pair_counts = apply_fanout_cap(pair_counts, 20);
3774
3775        // Write to DB.
3776        if since_commit.is_some() {
3777            // Full replace: clear and reinsert (we have the full merged set).
3778            self.conn.execute("DELETE FROM co_change_edges", ()).await?;
3779        } else {
3780            self.conn.execute("DELETE FROM co_change_edges", ()).await?;
3781        }
3782
3783        let mut inserted = 0usize;
3784        for ((a, b), count) in &pair_counts {
3785            self.conn.execute(
3786                "INSERT OR REPLACE INTO co_change_edges (file_a, file_b, count) VALUES (?1, ?2, ?3)",
3787                params![a.clone(), b.clone(), *count as i64],
3788            ).await?;
3789            inserted += 1;
3790        }
3791
3792        // Record the HEAD SHA so the next incremental run knows where to resume.
3793        self.conn
3794            .execute(
3795                "INSERT OR REPLACE INTO meta (key, value) VALUES ('co_change_last_commit', ?1)",
3796                params![head_sha],
3797            )
3798            .await?;
3799
3800        Ok(inserted)
3801    }
3802
3803    /// Query co-change edges from the index.
3804    ///
3805    /// Returns pairs `(file_a, file_b, count)` where count >= `min_count`.
3806    /// Returns `Ok(None)` if the `co_change_edges` table is empty (not yet built),
3807    /// so callers can fall back to the git walk.
3808    pub async fn query_co_change_edges(
3809        &self,
3810        min_count: usize,
3811    ) -> Result<Option<Vec<(String, String, usize)>>, libsql::Error> {
3812        // Check if the table has any data.
3813        let mut check = self
3814            .conn
3815            .query("SELECT COUNT(*) FROM co_change_edges", ())
3816            .await?;
3817        let total: i64 = if let Some(row) = check.next().await? {
3818            row.get(0)?
3819        } else {
3820            0
3821        };
3822        if total == 0 {
3823            return Ok(None);
3824        }
3825
3826        let mut rows = self
3827            .conn
3828            .query(
3829                "SELECT file_a, file_b, count FROM co_change_edges WHERE count >= ?1",
3830                params![min_count as i64],
3831            )
3832            .await?;
3833
3834        let mut result = Vec::new();
3835        while let Some(row) = rows.next().await? {
3836            let a: String = row.get(0)?;
3837            let b: String = row.get(1)?;
3838            let c: i64 = row.get(2)?;
3839            result.push((a, b, c as usize));
3840        }
3841        Ok(Some(result))
3842    }
3843
3844    /// Return the stored HEAD SHA from the last co-change rebuild, if any.
3845    pub async fn co_change_last_commit(&self) -> Option<String> {
3846        let mut rows = self
3847            .conn
3848            .query(
3849                "SELECT value FROM meta WHERE key = 'co_change_last_commit'",
3850                (),
3851            )
3852            .await
3853            .ok()?;
3854        let row = rows.next().await.ok()??;
3855        row.get(0).ok()
3856    }
3857
3858    // -------------------------------------------------------------------------
3859    // Diagnostics cache (daemon use only)
3860    // -------------------------------------------------------------------------
3861
3862    /// Persist rkyv-serialized diagnostics blob for one engine ("syntax", "fact", "native", "all").
3863    /// Replaces any previous value for that engine.
3864    ///
3865    /// `config_hash` is stamped on the row so callers can detect blobs produced
3866    /// under a different config (cross-daemon-restart staleness). See
3867    /// `load_diagnostics_blob` for the matching read side.
3868    pub async fn save_diagnostics_blob(
3869        &self,
3870        engine: &str,
3871        blob: &[u8],
3872        config_hash: &str,
3873    ) -> Result<(), libsql::Error> {
3874        let now = std::time::SystemTime::now()
3875            .duration_since(std::time::UNIX_EPOCH)
3876            .unwrap_or_default()
3877            .as_secs() as i64;
3878        self.conn
3879            .execute(
3880                "INSERT OR REPLACE INTO daemon_diagnostics (engine, issues_blob, config_hash, updated_at)
3881                 VALUES (?1, ?2, ?3, ?4)",
3882                params![engine.to_string(), blob.to_vec(), config_hash.to_string(), now],
3883            )
3884            .await?;
3885        Ok(())
3886    }
3887
3888    /// Load rkyv-serialized diagnostics blob for one engine.
3889    ///
3890    /// Returns `None` if no row exists *or* the row's `config_hash` does not
3891    /// match `expected_hash`. The mismatch case is treated as a cache miss so
3892    /// the caller will reprime under the current config rather than serving a
3893    /// blob from a previous daemon session.
3894    pub async fn load_diagnostics_blob(
3895        &self,
3896        engine: &str,
3897        expected_hash: &str,
3898    ) -> Result<Option<Vec<u8>>, libsql::Error> {
3899        let mut rows = self
3900            .conn
3901            .query(
3902                "SELECT issues_blob, config_hash FROM daemon_diagnostics WHERE engine = ?1",
3903                params![engine.to_string()],
3904            )
3905            .await?;
3906        if let Some(row) = rows.next().await? {
3907            let blob: Vec<u8> = row.get(0)?;
3908            let stored_hash: String = row.get(1)?;
3909            if stored_hash == expected_hash {
3910                Ok(Some(blob))
3911            } else {
3912                Ok(None)
3913            }
3914        } else {
3915            Ok(None)
3916        }
3917    }
3918
3919    /// Replace per-file diagnostics blobs in a single transaction.
3920    ///
3921    /// `upserts`: `(relative_path, rkyv_blob)` — files that have issues.
3922    /// `deletes`: relative paths that became clean (had a row, now don't).
3923    ///
3924    /// All upserts and deletes commit atomically so readers never see a
3925    /// partially-updated state.
3926    pub async fn save_diagnostics_per_file(
3927        &self,
3928        upserts: &[(String, Vec<u8>)],
3929        deletes: &[String],
3930        config_hash: &str,
3931    ) -> Result<(), libsql::Error> {
3932        let now = std::time::SystemTime::now()
3933            .duration_since(std::time::UNIX_EPOCH)
3934            .unwrap_or_default()
3935            .as_secs() as i64;
3936        self.conn.execute("BEGIN", ()).await?;
3937        let result: Result<(), libsql::Error> = async {
3938            for (path, blob) in upserts {
3939                self.conn
3940                    .execute(
3941                        "INSERT OR REPLACE INTO daemon_diagnostics_per_file
3942                         (path, issues_blob, config_hash, updated_at) VALUES (?1, ?2, ?3, ?4)",
3943                        params![path.clone(), blob.clone(), config_hash.to_string(), now],
3944                    )
3945                    .await?;
3946            }
3947            for path in deletes {
3948                self.conn
3949                    .execute(
3950                        "DELETE FROM daemon_diagnostics_per_file WHERE path = ?1",
3951                        params![path.clone()],
3952                    )
3953                    .await?;
3954            }
3955            Ok(())
3956        }
3957        .await;
3958        match result {
3959            Ok(()) => {
3960                self.conn.execute("COMMIT", ()).await?;
3961                Ok(())
3962            }
3963            Err(e) => {
3964                let _ = self.conn.execute("ROLLBACK", ()).await;
3965                Err(e)
3966            }
3967        }
3968    }
3969
3970    /// Load the rkyv blob for one file. `None` = no row (file is clean) or the
3971    /// row's `config_hash` doesn't match `expected_hash` (stale across config
3972    /// change).
3973    pub async fn load_diagnostics_for_file(
3974        &self,
3975        path: &str,
3976        expected_hash: &str,
3977    ) -> Result<Option<Vec<u8>>, libsql::Error> {
3978        let mut rows = self
3979            .conn
3980            .query(
3981                "SELECT issues_blob, config_hash FROM daemon_diagnostics_per_file WHERE path = ?1",
3982                params![path.to_string()],
3983            )
3984            .await?;
3985        if let Some(row) = rows.next().await? {
3986            let blob: Vec<u8> = row.get(0)?;
3987            let stored_hash: String = row.get(1)?;
3988            if stored_hash == expected_hash {
3989                Ok(Some(blob))
3990            } else {
3991                Ok(None)
3992            }
3993        } else {
3994            Ok(None)
3995        }
3996    }
3997
3998    /// Load blobs for many files. Skips files with no row or whose stored
3999    /// `config_hash` doesn't match `expected_hash`.
4000    /// Returns `(path, blob)` pairs in arbitrary order.
4001    pub async fn load_diagnostics_for_files(
4002        &self,
4003        paths: &[String],
4004        expected_hash: &str,
4005    ) -> Result<Vec<(String, Vec<u8>)>, libsql::Error> {
4006        let mut out = Vec::new();
4007        for path in paths {
4008            let mut rows = self
4009                .conn
4010                .query(
4011                    "SELECT path, issues_blob, config_hash FROM daemon_diagnostics_per_file WHERE path = ?1",
4012                    params![path.clone()],
4013                )
4014                .await?;
4015            if let Some(row) = rows.next().await? {
4016                let p: String = row.get(0)?;
4017                let b: Vec<u8> = row.get(1)?;
4018                let stored_hash: String = row.get(2)?;
4019                if stored_hash == expected_hash {
4020                    out.push((p, b));
4021                }
4022            }
4023        }
4024        Ok(out)
4025    }
4026
4027    /// Drop every cached diagnostic row (both per-engine blobs and the
4028    /// per-file table). Used by the daemon when `.normalize/config.toml` or a
4029    /// rule-definition file changes — the cached blobs reflect the *previous*
4030    /// config, so they must be cleared before a full reprime to prevent stale
4031    /// `RunRules` results being served between the config change and the
4032    /// reprime completing.
4033    pub async fn clear_all_diagnostics(&self) -> Result<(), libsql::Error> {
4034        self.conn
4035            .execute("DELETE FROM daemon_diagnostics", ())
4036            .await?;
4037        self.conn
4038            .execute("DELETE FROM daemon_diagnostics_per_file", ())
4039            .await?;
4040        Ok(())
4041    }
4042
4043    /// Return all paths that currently have a per-file diagnostics row.
4044    /// Used by the daemon refresh diff to detect files that became clean.
4045    pub async fn list_diagnostic_paths(&self) -> Result<Vec<String>, libsql::Error> {
4046        let mut rows = self
4047            .conn
4048            .query("SELECT path FROM daemon_diagnostics_per_file", ())
4049            .await?;
4050        let mut out = Vec::new();
4051        while let Some(row) = rows.next().await? {
4052            out.push(row.get(0)?);
4053        }
4054        Ok(out)
4055    }
4056}
4057
4058// =============================================================================
4059// CFG building helpers
4060// =============================================================================
4061
4062/// Build CFG data (blocks, edges, defs, uses) for all function/method symbols in a file.
4063///
4064/// Returns four vecs of rows ready for DB insertion. Errors from individual function builds
4065/// are silently ignored (best-effort) so a broken CFG query doesn't abort the whole index.
4066fn build_cfg_data_for_file(
4067    full_path: &Path,
4068    source_bytes: &[u8],
4069    grammar_name: &str,
4070    symbols: &[FlatSymbol],
4071) -> CfgData {
4072    let mut all_blocks: Vec<CfgBlockRow> = Vec::new();
4073    let mut all_edges: Vec<CfgEdgeRow> = Vec::new();
4074    let mut all_defs: Vec<CfgDefRow> = Vec::new();
4075    let mut all_uses: Vec<CfgUseRow> = Vec::new();
4076    let mut all_effects: Vec<CfgEffectRow> = Vec::new();
4077
4078    // Helper macro to construct an early-return CfgData.
4079    macro_rules! empty_cfg_data {
4080        () => {
4081            CfgData {
4082                blocks: all_blocks,
4083                edges: all_edges,
4084                defs: all_defs,
4085                uses: all_uses,
4086                effects: all_effects,
4087            }
4088        };
4089    }
4090
4091    // Only proceed if the language has a CFG query.
4092    let loader = normalize_languages::parsers::grammar_loader();
4093    let cfg_query_src = match loader.get_cfg(grammar_name) {
4094        Some(q) => q,
4095        None => return empty_cfg_data!(),
4096    };
4097    let ts_language = match loader.get(grammar_name) {
4098        Ok(l) => l,
4099        Err(_) => return empty_cfg_data!(),
4100    };
4101    let tags_query_src = match loader.get_tags(grammar_name) {
4102        Some(q) => q,
4103        None => return empty_cfg_data!(),
4104    };
4105
4106    // Parse the file.
4107    let mut parser = tree_sitter::Parser::new();
4108    if parser.set_language(&ts_language).is_err() {
4109        return empty_cfg_data!();
4110    }
4111    let tree = match parser.parse(source_bytes, None) {
4112        Some(t) => t,
4113        None => return empty_cfg_data!(),
4114    };
4115
4116    // Build a set of (name, start_line) for function/method symbols.
4117    let func_symbols: Vec<(&FlatSymbol, u32)> = symbols
4118        .iter()
4119        .filter_map(|s| {
4120            let kind = s.kind.as_str();
4121            if kind == "function" || kind == "method" {
4122                Some((s, s.start_line as u32))
4123            } else {
4124                None
4125            }
4126        })
4127        .collect();
4128
4129    if func_symbols.is_empty() {
4130        return empty_cfg_data!();
4131    }
4132
4133    // Find function body byte ranges using the tags query.
4134    let tags_query = match tree_sitter::Query::new(&ts_language, &tags_query_src) {
4135        Ok(q) => q,
4136        Err(_) => return empty_cfg_data!(),
4137    };
4138    let capture_names = tags_query.capture_names().to_vec();
4139    let mut cursor = tree_sitter::QueryCursor::new();
4140    let mut matches_iter = cursor.matches(&tags_query, tree.root_node(), source_bytes);
4141
4142    // Collect (func_name, def_start, def_end, start_line).
4143    struct FuncCandidate {
4144        name: String,
4145        start_byte: usize,
4146        end_byte: usize,
4147        start_line: u32,
4148    }
4149    let mut candidates: Vec<FuncCandidate> = Vec::new();
4150    use streaming_iterator::StreamingIterator as _;
4151    while let Some(mat) = matches_iter.next() {
4152        for cap in mat.captures {
4153            let cap_name = capture_names[cap.index as usize];
4154            if cap_name.starts_with("name.definition.function")
4155                || cap_name.starts_with("name.definition.method")
4156                || cap_name == "name.definition"
4157            {
4158                let func_name = cap
4159                    .node
4160                    .utf8_text(source_bytes)
4161                    .unwrap_or("<unknown>")
4162                    .to_string();
4163                let def_node = cap.node.parent().unwrap_or(cap.node);
4164                candidates.push(FuncCandidate {
4165                    name: func_name,
4166                    start_byte: def_node.start_byte(),
4167                    end_byte: def_node.end_byte(),
4168                    start_line: def_node.start_position().row as u32 + 1,
4169                });
4170            }
4171        }
4172    }
4173    drop(matches_iter);
4174
4175    // For each function symbol, find matching candidate by name + start_line proximity.
4176    for (sym, sym_start_line) in &func_symbols {
4177        // Find the candidate whose name matches and start_line is close.
4178        let candidate = candidates
4179            .iter()
4180            .filter(|c| c.name == sym.name)
4181            .min_by_key(|c| (*sym_start_line as i64 - c.start_line as i64).unsigned_abs());
4182        let candidate = match candidate {
4183            Some(c) => c,
4184            None => continue,
4185        };
4186
4187        let body_range = candidate.start_byte..candidate.end_byte;
4188        let function_id = normalize_cfg::FunctionId {
4189            file: full_path.to_string_lossy().into_owned(),
4190            qualified_name: sym.name.clone(),
4191            start_line: candidate.start_line,
4192        };
4193
4194        let cfg = match normalize_cfg::builder::build(
4195            &tree,
4196            &cfg_query_src,
4197            source_bytes,
4198            function_id,
4199            body_range,
4200        ) {
4201            Ok(c) => c,
4202            Err(_) => continue,
4203        };
4204
4205        let qname = &sym.name;
4206        let fsl = candidate.start_line;
4207
4208        for blk in &cfg.blocks {
4209            all_blocks.push(CfgBlockRow {
4210                function_qname: qname.clone(),
4211                function_start_line: fsl,
4212                block_id: blk.id.0,
4213                kind: format!("{:?}", blk.kind).to_lowercase(),
4214                byte_start: blk.byte_range.start,
4215                byte_end: blk.byte_range.end,
4216                start_line: blk.start_line,
4217                end_line: blk.end_line,
4218            });
4219            for def in &blk.defs {
4220                all_defs.push(CfgDefRow {
4221                    function_qname: qname.clone(),
4222                    function_start_line: fsl,
4223                    block_id: blk.id.0,
4224                    name: def.name.clone(),
4225                    byte_offset: def.byte_offset,
4226                    line: def.line,
4227                });
4228            }
4229            for use_ in &blk.uses {
4230                all_uses.push(CfgUseRow {
4231                    function_qname: qname.clone(),
4232                    function_start_line: fsl,
4233                    block_id: blk.id.0,
4234                    name: use_.name.clone(),
4235                    byte_offset: use_.byte_offset,
4236                    line: use_.line,
4237                });
4238            }
4239            for eff in &blk.effects {
4240                all_effects.push(CfgEffectRow {
4241                    function_qname: qname.clone(),
4242                    function_start_line: fsl,
4243                    block_id: blk.id.0,
4244                    kind: format!("{:?}", eff.kind).to_lowercase(),
4245                    byte_offset: eff.byte_offset,
4246                    line: eff.line,
4247                    label: eff.label.clone(),
4248                });
4249            }
4250        }
4251        for edge in &cfg.edges {
4252            all_edges.push(CfgEdgeRow {
4253                function_qname: qname.clone(),
4254                function_start_line: fsl,
4255                from_block: edge.from.0,
4256                to_block: edge.to.0,
4257                kind: format!("{:?}", edge.kind).to_lowercase(),
4258                exception_type: edge.exception_type.clone(),
4259            });
4260        }
4261    }
4262
4263    CfgData {
4264        blocks: all_blocks,
4265        edges: all_edges,
4266        defs: all_defs,
4267        uses: all_uses,
4268        effects: all_effects,
4269    }
4270}
4271
4272// =============================================================================
4273// Co-change helpers (not on FileIndex — free functions to keep impl clean)
4274// =============================================================================
4275
4276/// Open a gix repository at or containing `root`.
4277fn open_gix_repo(root: &std::path::Path) -> Option<gix::Repository> {
4278    gix::discover(root)
4279        .ok()
4280        .map(|r| r.into_sync().to_thread_local())
4281}
4282
4283/// Walk commits via gix, returning per-commit lists of *source* files changed.
4284///
4285/// If `since_commit` is `Some(sha)`, only commits after (exclusive) that SHA are returned.
4286/// Commits are yielded oldest-first from the HEAD ancestry.
4287fn walk_commits_for_co_change(
4288    repo: &gix::Repository,
4289    since_commit: Option<&str>,
4290) -> Vec<Vec<String>> {
4291    let head_id = match repo.head_id() {
4292        Ok(id) => id,
4293        Err(_) => return Vec::new(),
4294    };
4295    let walk = match head_id.ancestors().all() {
4296        Ok(w) => w,
4297        Err(_) => return Vec::new(),
4298    };
4299
4300    // If since_commit is specified, resolve it to an ObjectId for fast comparison.
4301    let stop_id: Option<gix::hash::ObjectId> = since_commit.and_then(|sha| sha.parse().ok());
4302
4303    let mut result = Vec::new();
4304
4305    for info in walk {
4306        let Ok(info) = info else { continue };
4307        let commit_id = info.id();
4308
4309        // Stop when we hit the commit we already processed.
4310        if let Some(ref stop) = stop_id
4311            && commit_id == *stop
4312        {
4313            break;
4314        }
4315
4316        let Ok(commit) = info.object() else { continue };
4317        let Ok(tree) = commit.tree() else { continue };
4318
4319        let parent_tree = info
4320            .parent_ids()
4321            .next()
4322            .and_then(|pid| pid.object().ok())
4323            .and_then(|obj| obj.into_commit().tree().ok());
4324
4325        let changes = match repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&tree), None) {
4326            Ok(c) => c,
4327            Err(_) => continue,
4328        };
4329
4330        let files: Vec<String> = changes
4331            .into_iter()
4332            .filter_map(|change| {
4333                use gix::object::tree::diff::ChangeDetached;
4334                let location = match change {
4335                    ChangeDetached::Addition { location, .. } => location,
4336                    ChangeDetached::Deletion { location, .. } => location,
4337                    ChangeDetached::Modification { location, .. } => location,
4338                    ChangeDetached::Rewrite {
4339                        source_location, ..
4340                    } => source_location,
4341                };
4342                let path_str = String::from_utf8_lossy(&location).into_owned();
4343                // Only include source files (those with a supported language extension).
4344                if is_source_file(&path_str) {
4345                    Some(path_str)
4346                } else {
4347                    None
4348                }
4349            })
4350            .collect();
4351
4352        if files.len() >= 2 {
4353            result.push(files);
4354        }
4355    }
4356
4357    result
4358}
4359
4360/// Apply a per-file fanout cap: for each file, keep only its top `cap` partners by count.
4361///
4362/// Returns a new HashMap with entries pruned to satisfy the cap.
4363fn apply_fanout_cap(
4364    pair_counts: std::collections::HashMap<(String, String), usize>,
4365    cap: usize,
4366) -> std::collections::HashMap<(String, String), usize> {
4367    use std::collections::HashMap;
4368
4369    // Build per-file partner lists.
4370    let mut file_partners: HashMap<String, Vec<(String, usize)>> = HashMap::new();
4371    for ((a, b), count) in &pair_counts {
4372        file_partners
4373            .entry(a.clone())
4374            .or_default()
4375            .push((b.clone(), *count));
4376        file_partners
4377            .entry(b.clone())
4378            .or_default()
4379            .push((a.clone(), *count));
4380    }
4381
4382    // For each file, keep only the top `cap` partners.
4383    let mut allowed: std::collections::HashSet<(String, String)> = std::collections::HashSet::new();
4384    for (file, mut partners) in file_partners {
4385        partners.sort_unstable_by(|a, b| b.1.cmp(&a.1));
4386        partners.truncate(cap);
4387        for (partner, _) in partners {
4388            // Canonical key: lexicographically smaller goes first.
4389            let key = if file <= partner {
4390                (file.clone(), partner)
4391            } else {
4392                (partner, file.clone())
4393            };
4394            allowed.insert(key);
4395        }
4396    }
4397
4398    pair_counts
4399        .into_iter()
4400        .filter(|(k, _)| allowed.contains(k))
4401        .collect()
4402}
4403
4404#[cfg(test)]
4405mod tests {
4406    use super::*;
4407    use std::fs;
4408    use tempfile::tempdir;
4409
4410    #[tokio::test]
4411    async fn test_index_creation() {
4412        let dir = tempdir().unwrap();
4413        fs::create_dir_all(dir.path().join("src/myapp")).unwrap();
4414        fs::write(dir.path().join("src/myapp/cli.py"), "").unwrap();
4415        fs::write(dir.path().join("src/myapp/dwim.py"), "").unwrap();
4416
4417        let mut index = FileIndex::open(&dir.path().join("index.sqlite"), dir.path())
4418            .await
4419            .unwrap();
4420        assert!(index.needs_refresh().await);
4421
4422        let count = index.refresh().await.unwrap();
4423        assert!(count >= 2);
4424
4425        // Should find files by name
4426        let matches = index.find_by_name("cli.py").await.unwrap();
4427        assert_eq!(matches.len(), 1);
4428        assert!(matches[0].path.ends_with("cli.py"));
4429    }
4430
4431    #[tokio::test]
4432    async fn test_find_by_stem() {
4433        let dir = tempdir().unwrap();
4434        fs::create_dir_all(dir.path().join("src")).unwrap();
4435        fs::write(dir.path().join("src/test.py"), "").unwrap();
4436        fs::write(dir.path().join("src/test.rs"), "").unwrap();
4437
4438        let mut index = FileIndex::open(&dir.path().join("index.sqlite"), dir.path())
4439            .await
4440            .unwrap();
4441        index.refresh().await.unwrap();
4442
4443        let matches = index.find_by_stem("test").await.unwrap();
4444        assert_eq!(matches.len(), 2);
4445    }
4446
4447    #[tokio::test]
4448    async fn test_wildcard_import_resolution() {
4449        let dir = tempdir().unwrap();
4450        fs::create_dir_all(dir.path().join("src/mylib")).unwrap();
4451        // Module that exports MyClass
4452        fs::write(
4453            dir.path().join("src/mylib/exports.py"),
4454            "class MyClass: pass",
4455        )
4456        .unwrap();
4457        // Module that exports OtherThing
4458        fs::write(
4459            dir.path().join("src/mylib/other.py"),
4460            "def OtherThing(): pass",
4461        )
4462        .unwrap();
4463        // Consumer with wildcard imports
4464        fs::write(
4465            dir.path().join("src/consumer.py"),
4466            "from mylib.exports import *\nfrom mylib.other import *\nMyClass()",
4467        )
4468        .unwrap();
4469
4470        let mut index = FileIndex::open(&dir.path().join("index.sqlite"), dir.path())
4471            .await
4472            .unwrap();
4473        index.refresh().await.unwrap();
4474        index.refresh_call_graph().await.unwrap();
4475
4476        // Now resolve MyClass - should find it in mylib.exports
4477        let result = index
4478            .resolve_import("src/consumer.py", "MyClass")
4479            .await
4480            .unwrap();
4481        assert!(result.is_some(), "Should resolve MyClass");
4482        let (module, name) = result.unwrap();
4483        assert_eq!(module, "mylib.exports");
4484        assert_eq!(name, "MyClass");
4485
4486        // Resolve OtherThing - should find it in mylib.other
4487        let result = index
4488            .resolve_import("src/consumer.py", "OtherThing")
4489            .await
4490            .unwrap();
4491        assert!(result.is_some(), "Should resolve OtherThing");
4492        let (module, name) = result.unwrap();
4493        assert_eq!(module, "mylib.other");
4494        assert_eq!(name, "OtherThing");
4495    }
4496
4497    #[tokio::test]
4498    async fn test_method_call_resolution() {
4499        let dir = tempdir().unwrap();
4500        fs::create_dir_all(dir.path().join("src")).unwrap();
4501        // A class with methods that call each other
4502        let class_code = r#"
4503class MyClass:
4504    def method_a(self):
4505        self.method_b()
4506
4507    def method_b(self):
4508        pass
4509
4510    def method_c(self):
4511        self.method_b()
4512"#;
4513        fs::write(dir.path().join("src/myclass.py"), class_code).unwrap();
4514
4515        let mut index = FileIndex::open(&dir.path().join("index.sqlite"), dir.path())
4516            .await
4517            .unwrap();
4518        index.refresh().await.unwrap();
4519        index.refresh_call_graph().await.unwrap();
4520
4521        // Find callers of method_b - should include method_a and method_c
4522        let callers = index
4523            .find_callers("method_b", "src/myclass.py")
4524            .await
4525            .unwrap();
4526        assert!(!callers.is_empty(), "Should find callers of method_b");
4527
4528        let caller_names: Vec<&str> = callers
4529            .iter()
4530            .map(|(_, name, _, _)| name.as_str())
4531            .collect();
4532        assert!(
4533            caller_names.contains(&"method_a"),
4534            "method_a should call method_b"
4535        );
4536        assert!(
4537            caller_names.contains(&"method_c"),
4538            "method_c should call method_b"
4539        );
4540
4541        // Find callers of MyClass.method_b - more specific
4542        let callers = index
4543            .find_callers("MyClass.method_b", "src/myclass.py")
4544            .await
4545            .unwrap();
4546        assert!(
4547            !callers.is_empty(),
4548            "Should find callers of MyClass.method_b"
4549        );
4550    }
4551
4552    /// Regression test: find_callers must not return callers of a same-named function
4553    /// in a different module. Two modules define `helper()`, and `main.py` imports only
4554    /// one of them. `find_callers("helper", "src/utils_a.py")` must not include calls
4555    /// that target `src/utils_b.py`'s `helper()`.
4556    #[tokio::test]
4557    async fn test_find_callers_cross_module_disambiguation() {
4558        let dir = tempdir().unwrap();
4559        fs::create_dir_all(dir.path().join("src")).unwrap();
4560
4561        // Two modules with the same function name
4562        fs::write(
4563            dir.path().join("src/utils_a.py"),
4564            "def helper():\n    return 'A'\n",
4565        )
4566        .unwrap();
4567        fs::write(
4568            dir.path().join("src/utils_b.py"),
4569            "def helper():\n    return 'B'\n",
4570        )
4571        .unwrap();
4572
4573        // caller_a.py imports from utils_a and calls helper()
4574        fs::write(
4575            dir.path().join("src/caller_a.py"),
4576            "from utils_a import helper\n\ndef do_a():\n    helper()\n",
4577        )
4578        .unwrap();
4579
4580        // caller_b.py imports from utils_b and calls helper()
4581        fs::write(
4582            dir.path().join("src/caller_b.py"),
4583            "from utils_b import helper\n\ndef do_b():\n    helper()\n",
4584        )
4585        .unwrap();
4586
4587        let mut index = FileIndex::open(&dir.path().join("index.sqlite"), dir.path())
4588            .await
4589            .unwrap();
4590        index.refresh().await.unwrap();
4591        index.refresh_call_graph().await.unwrap();
4592
4593        // Check whether imports got resolved (depends on normalize-local-deps Python support)
4594        let mut rows = index
4595            .connection()
4596            .query(
4597                "SELECT file, resolved_file FROM imports WHERE name = 'helper' ORDER BY file",
4598                (),
4599            )
4600            .await
4601            .unwrap();
4602        let mut import_resolution: Vec<(String, Option<String>)> = Vec::new();
4603        while let Some(row) = rows.next().await.unwrap() {
4604            import_resolution.push((row.get(0).unwrap(), row.get(1).unwrap()));
4605        }
4606
4607        // Check whether calls got resolved
4608        let mut rows = index
4609            .connection()
4610            .query(
4611                "SELECT caller_file, callee_name, callee_resolved_file FROM calls WHERE callee_name = 'helper' ORDER BY caller_file",
4612                (),
4613            )
4614            .await
4615            .unwrap();
4616        let mut call_resolution: Vec<(String, String, Option<String>)> = Vec::new();
4617        while let Some(row) = rows.next().await.unwrap() {
4618            call_resolution.push((
4619                row.get(0).unwrap(),
4620                row.get(1).unwrap(),
4621                row.get(2).unwrap(),
4622            ));
4623        }
4624
4625        // Ask for callers of utils_a's helper
4626        let callers = index
4627            .find_callers("helper", "src/utils_a.py")
4628            .await
4629            .unwrap();
4630        let caller_files: Vec<&str> = callers.iter().map(|(f, _, _, _)| f.as_str()).collect();
4631
4632        // When imports are resolved, disambiguation is precise — only the correct
4633        // caller appears. When unresolved (no LocalDeps for test setup), both
4634        // callers may appear via the NULL fallback. Either way caller_a must appear.
4635        assert!(
4636            caller_files.contains(&"src/caller_a.py"),
4637            "caller_a.py calls helper() (imports utils_a), must be a caller. Got: {:?}\nimports: {:?}\ncalls: {:?}",
4638            caller_files,
4639            import_resolution,
4640            call_resolution,
4641        );
4642
4643        let imports_resolved = import_resolution
4644            .iter()
4645            .any(|(_, r)| r.as_deref() == Some("src/utils_a.py"));
4646        if imports_resolved {
4647            assert!(
4648                !caller_files.contains(&"src/caller_b.py"),
4649                "caller_b.py imports utils_b, should NOT be a caller of utils_a::helper. Got: {:?}",
4650                caller_files
4651            );
4652        }
4653
4654        // Ask for callers of utils_b's helper
4655        let callers = index
4656            .find_callers("helper", "src/utils_b.py")
4657            .await
4658            .unwrap();
4659        let caller_files: Vec<&str> = callers.iter().map(|(f, _, _, _)| f.as_str()).collect();
4660        assert!(
4661            caller_files.contains(&"src/caller_b.py"),
4662            "caller_b.py calls helper() (imports utils_b), must be a caller. Got: {:?}\nimports: {:?}\ncalls: {:?}",
4663            caller_files,
4664            import_resolution,
4665            call_resolution,
4666        );
4667        if imports_resolved {
4668            assert!(
4669                !caller_files.contains(&"src/caller_a.py"),
4670                "caller_a.py imports utils_a, should NOT be a caller of utils_b::helper. Got: {:?}",
4671                caller_files
4672            );
4673        }
4674    }
4675
4676    // =====================================================================
4677    // Per-file diagnostics storage tests
4678    // =====================================================================
4679
4680    /// Build a FileIndex on an empty tempdir for diagnostics-table tests.
4681    async fn empty_index(dir: &std::path::Path) -> FileIndex {
4682        FileIndex::open(&dir.join("index.sqlite"), dir)
4683            .await
4684            .unwrap()
4685    }
4686
4687    #[tokio::test]
4688    async fn per_file_save_upsert_and_delete_roundtrip() {
4689        let dir = tempdir().unwrap();
4690        let index = empty_index(dir.path()).await;
4691
4692        let upserts = vec![
4693            ("a.rs".to_string(), vec![1u8, 2, 3]),
4694            ("b.rs".to_string(), vec![4, 5, 6]),
4695        ];
4696        index
4697            .save_diagnostics_per_file(&upserts, &[], "h1")
4698            .await
4699            .unwrap();
4700
4701        let a = index.load_diagnostics_for_file("a.rs", "h1").await.unwrap();
4702        let b = index.load_diagnostics_for_file("b.rs", "h1").await.unwrap();
4703        assert_eq!(a, Some(vec![1, 2, 3]));
4704        assert_eq!(b, Some(vec![4, 5, 6]));
4705
4706        // Now delete a.rs and update b.rs in the same call.
4707        let upserts2 = vec![("b.rs".to_string(), vec![9, 9])];
4708        let deletes2 = vec!["a.rs".to_string()];
4709        index
4710            .save_diagnostics_per_file(&upserts2, &deletes2, "h1")
4711            .await
4712            .unwrap();
4713
4714        assert_eq!(
4715            index.load_diagnostics_for_file("a.rs", "h1").await.unwrap(),
4716            None
4717        );
4718        assert_eq!(
4719            index.load_diagnostics_for_file("b.rs", "h1").await.unwrap(),
4720            Some(vec![9, 9])
4721        );
4722    }
4723
4724    #[tokio::test]
4725    async fn per_file_save_empty_inputs_is_noop() {
4726        let dir = tempdir().unwrap();
4727        let index = empty_index(dir.path()).await;
4728        // No-op call should succeed and leave the table empty.
4729        index
4730            .save_diagnostics_per_file(&[], &[], "h")
4731            .await
4732            .unwrap();
4733        assert!(index.list_diagnostic_paths().await.unwrap().is_empty());
4734    }
4735
4736    #[tokio::test]
4737    async fn load_diagnostics_for_file_missing_returns_none() {
4738        let dir = tempdir().unwrap();
4739        let index = empty_index(dir.path()).await;
4740        assert_eq!(
4741            index
4742                .load_diagnostics_for_file("nope.rs", "h")
4743                .await
4744                .unwrap(),
4745            None
4746        );
4747    }
4748
4749    /// A row written under one config_hash must be invisible to a load that
4750    /// presents a different hash — this is what makes the cache safe across
4751    /// daemon restarts after a config edit.
4752    #[tokio::test]
4753    async fn per_file_config_hash_mismatch_is_cache_miss() {
4754        let dir = tempdir().unwrap();
4755        let index = empty_index(dir.path()).await;
4756        index
4757            .save_diagnostics_per_file(&[("a.rs".to_string(), vec![1])], &[], "old")
4758            .await
4759            .unwrap();
4760        // Same hash → hit.
4761        assert_eq!(
4762            index
4763                .load_diagnostics_for_file("a.rs", "old")
4764                .await
4765                .unwrap(),
4766            Some(vec![1])
4767        );
4768        // Different hash → miss.
4769        assert_eq!(
4770            index
4771                .load_diagnostics_for_file("a.rs", "new")
4772                .await
4773                .unwrap(),
4774            None
4775        );
4776        let multi = index
4777            .load_diagnostics_for_files(&["a.rs".to_string()], "new")
4778            .await
4779            .unwrap();
4780        assert!(multi.is_empty());
4781    }
4782
4783    /// Same invariant for the per-engine `daemon_diagnostics` table.
4784    #[tokio::test]
4785    async fn engine_blob_config_hash_mismatch_is_cache_miss() {
4786        let dir = tempdir().unwrap();
4787        let index = empty_index(dir.path()).await;
4788        index
4789            .save_diagnostics_blob("syntax", &[7, 8, 9], "old")
4790            .await
4791            .unwrap();
4792        assert_eq!(
4793            index.load_diagnostics_blob("syntax", "old").await.unwrap(),
4794            Some(vec![7, 8, 9])
4795        );
4796        assert_eq!(
4797            index.load_diagnostics_blob("syntax", "new").await.unwrap(),
4798            None
4799        );
4800    }
4801
4802    #[tokio::test]
4803    async fn load_diagnostics_for_files_skips_missing() {
4804        let dir = tempdir().unwrap();
4805        let index = empty_index(dir.path()).await;
4806        let upserts = vec![("a.rs".to_string(), vec![1]), ("c.rs".to_string(), vec![3])];
4807        index
4808            .save_diagnostics_per_file(&upserts, &[], "h1")
4809            .await
4810            .unwrap();
4811
4812        // Mix present + missing, in a non-canonical order.
4813        let query = vec![
4814            "c.rs".to_string(),
4815            "missing.rs".to_string(),
4816            "a.rs".to_string(),
4817        ];
4818        let mut got: Vec<(String, Vec<u8>)> = index
4819            .load_diagnostics_for_files(&query, "h1")
4820            .await
4821            .unwrap();
4822        got.sort_by(|x, y| x.0.cmp(&y.0));
4823        assert_eq!(
4824            got,
4825            vec![("a.rs".to_string(), vec![1]), ("c.rs".to_string(), vec![3]),]
4826        );
4827    }
4828
4829    #[tokio::test]
4830    async fn list_diagnostic_paths_returns_all() {
4831        let dir = tempdir().unwrap();
4832        let index = empty_index(dir.path()).await;
4833        let upserts = vec![
4834            ("x".to_string(), vec![0]),
4835            ("y".to_string(), vec![0]),
4836            ("z".to_string(), vec![0]),
4837        ];
4838        index
4839            .save_diagnostics_per_file(&upserts, &[], "h")
4840            .await
4841            .unwrap();
4842        let mut paths = index.list_diagnostic_paths().await.unwrap();
4843        paths.sort();
4844        assert_eq!(paths, vec!["x", "y", "z"]);
4845    }
4846
4847    /// Smoke test: a fresh open creates the per-file diagnostics table with the
4848    /// BLOB column type required by `save_diagnostics_per_file`. (A row inserted
4849    /// with the wrong column type by an older schema version would fail this
4850    /// roundtrip — the schema_version != SCHEMA_VERSION migration block at
4851    /// `FileIndex::open` is responsible for `DROP TABLE IF EXISTS
4852    /// daemon_diagnostics_per_file` so the new shape is created cleanly.)
4853    #[tokio::test]
4854    async fn fresh_open_per_file_table_accepts_blob_roundtrip() {
4855        let dir = tempdir().unwrap();
4856        let index = FileIndex::open(&dir.path().join("index.sqlite"), dir.path())
4857            .await
4858            .unwrap();
4859        // The CREATE statement at FileIndex::open declares issues_blob BLOB NOT NULL.
4860        // Confirm the column type via PRAGMA table_info.
4861        let mut rows = index
4862            .conn
4863            .query("PRAGMA table_info(daemon_diagnostics_per_file)", ())
4864            .await
4865            .unwrap();
4866        let mut col_types: Vec<(String, String)> = Vec::new();
4867        while let Some(row) = rows.next().await.unwrap() {
4868            let name: String = row.get(1).unwrap();
4869            let ty: String = row.get(2).unwrap();
4870            col_types.push((name, ty));
4871        }
4872        let blob_col = col_types
4873            .iter()
4874            .find(|(n, _)| n == "issues_blob")
4875            .expect("issues_blob column missing");
4876        assert_eq!(
4877            blob_col.1.to_uppercase(),
4878            "BLOB",
4879            "issues_blob must be BLOB, got {:?}",
4880            blob_col.1
4881        );
4882
4883        // And the BLOB roundtrip itself works.
4884        index
4885            .save_diagnostics_per_file(&[("a".to_string(), vec![1, 2, 3])], &[], "h")
4886            .await
4887            .unwrap();
4888        assert_eq!(
4889            index.load_diagnostics_for_file("a", "h").await.unwrap(),
4890            Some(vec![1, 2, 3])
4891        );
4892    }
4893
4894    #[tokio::test]
4895    async fn invalidate_last_indexed_resets_needs_refresh_gate() {
4896        let dir = tempdir().unwrap();
4897        std::fs::write(dir.path().join("a.txt"), "x").unwrap();
4898        let mut index = FileIndex::open(&dir.path().join("index.sqlite"), dir.path())
4899            .await
4900            .unwrap();
4901        index.refresh().await.unwrap();
4902        // Just-after-refresh, the 60-second gate suppresses needs_refresh.
4903        assert!(!index.needs_refresh().await);
4904        index.invalidate_last_indexed().await.unwrap();
4905        assert!(index.needs_refresh().await);
4906    }
4907}