Skip to main content

normalize_facts/
index.rs

1use crate::symbols::SymbolParser;
2use ignore::WalkBuilder;
3use indicatif::{ParallelProgressIterator, ProgressBar, ProgressStyle};
4use libsql::{Connection, Database, params};
5pub use normalize_facts_core::IndexedFile;
6use normalize_facts_core::{FlatImport, FlatSymbol, TypeRef};
7use normalize_languages::support_for_path;
8use rayon::prelude::*;
9use std::path::{Path, PathBuf};
10use std::time::{SystemTime, UNIX_EPOCH};
11
12/// A parsed symbol ready for database insertion.
13#[derive(serde::Serialize, serde::Deserialize)]
14struct ParsedSymbol {
15    name: String,
16    kind: String,
17    start_line: usize,
18    end_line: usize,
19    parent: Option<String>,
20    visibility: String,
21    attributes: Vec<String>,
22    is_interface_impl: bool,
23    implements: Vec<String>,
24    docstring: Option<String>,
25}
26
27/// One call-site entry: (caller_symbol, callee_name, callee_qualifier, access, line).
28type CallEntry = (String, String, Option<String>, Option<String>, usize);
29
30/// Parsed data for a single file, ready for database insertion
31struct ParsedFileData {
32    file_path: String,
33    symbols: Vec<ParsedSymbol>,
34    calls: Vec<CallEntry>,
35    /// imports (for Python files only)
36    imports: Vec<FlatImport>,
37    /// (type_name, method_name) for interface/class method signatures
38    type_methods: Vec<(String, String)>,
39    /// Type-to-type references (field types, param types, extends, etc.)
40    type_refs: Vec<TypeRef>,
41}
42
43/// CA-cache payload: all extracted data for a single file, keyed by content hash.
44/// Does not include `file_path` — that is the lookup key, not part of the payload.
45#[derive(serde::Serialize, serde::Deserialize)]
46struct CachedFileData {
47    symbols: Vec<ParsedSymbol>,
48    calls: Vec<CallEntry>,
49    imports: Vec<FlatImport>,
50    type_methods: Vec<(String, String)>,
51    type_refs: Vec<TypeRef>,
52}
53
54// Not yet public - just delete .normalize/index.sqlite on schema changes
55const SCHEMA_VERSION: i64 = 12;
56
57/// Bump when extraction logic changes to invalidate cached results.
58/// Bumped to "2" (2026-04-27): purge CA cache entries that may have been poisoned
59/// by the old bug where rebuilds without grammars loaded cached empty results.
60const EXTRACTOR_VERSION: &str = "2";
61
62/// Check if a file path has a supported source extension.
63fn is_source_file(path: &str) -> bool {
64    normalize_languages::support_for_path(std::path::Path::new(path)).is_some()
65}
66
67/// Generate SQL WHERE clause for filtering source files.
68/// Returns: "path LIKE '%.py' OR path LIKE '%.rs' OR ..."
69fn source_extensions_sql_filter() -> String {
70    let mut extensions: Vec<&str> = normalize_languages::supported_languages()
71        .iter()
72        .flat_map(|lang| lang.extensions().iter().copied())
73        .collect();
74    extensions.sort_unstable();
75    extensions.dedup();
76    extensions
77        .iter()
78        .map(|ext| format!("path LIKE '%.{}'", ext))
79        .collect::<Vec<_>>()
80        .join(" OR ")
81}
82
83/// Result from symbol search
84#[derive(Debug, Clone, serde::Serialize)]
85pub struct SymbolMatch {
86    pub name: String,
87    pub kind: String,
88    pub file: String,
89    pub start_line: usize,
90    pub end_line: usize,
91    pub parent: Option<String>,
92}
93
94/// Files that changed since last index
95#[derive(Debug, Default)]
96pub struct ChangedFiles {
97    pub added: Vec<String>,
98    pub modified: Vec<String>,
99    pub deleted: Vec<String>,
100}
101
102/// Call graph statistics
103#[derive(Debug, Clone, Copy, Default)]
104pub struct CallGraphStats {
105    pub symbols: usize,
106    pub calls: usize,
107    pub imports: usize,
108}
109
110pub struct FileIndex {
111    conn: Connection,
112    #[allow(dead_code)]
113    db: Database,
114    root: PathBuf,
115    progress: bool,
116    /// Content-addressed extraction cache (optional; best-effort).
117    ca_cache: Option<crate::ca_cache::CaCache>,
118}
119
120impl FileIndex {
121    /// Open or create an index at the specified database path.
122    /// On corruption, automatically deletes and recreates the index.
123    ///
124    /// # Arguments
125    /// * `db_path` - Path to the SQLite database file
126    /// * `root` - Project root directory (used for file walking during refresh)
127    pub async fn open(db_path: &Path, root: &Path) -> Result<Self, libsql::Error> {
128        // Ensure parent directory exists
129        if let Some(parent) = db_path.parent()
130            && let Err(e) = std::fs::create_dir_all(parent)
131        {
132            tracing::warn!(
133                "normalize-facts: failed to create index directory {:?}: {}",
134                parent,
135                e
136            );
137        }
138
139        // Try to open, with recovery on corruption
140        match Self::try_open(db_path, root).await {
141            Ok(idx) => Ok(idx),
142            Err(e) => {
143                // Check for corruption-like errors
144                let err_str = e.to_string().to_lowercase();
145                let is_corruption = err_str.contains("corrupt")
146                    || err_str.contains("malformed")
147                    || err_str.contains("disk i/o error")
148                    || err_str.contains("not a database")
149                    || err_str.contains("database disk image")
150                    || err_str.contains("integrity check failed");
151
152                if is_corruption {
153                    tracing::warn!("Index corrupted, rebuilding: {}", e);
154                    // Delete corrupted database and retry
155                    let _ = std::fs::remove_file(db_path);
156                    // Also remove journal/wal files if they exist
157                    let _ = std::fs::remove_file(db_path.with_extension("sqlite-journal"));
158                    let _ = std::fs::remove_file(db_path.with_extension("sqlite-wal"));
159                    let _ = std::fs::remove_file(db_path.with_extension("sqlite-shm"));
160                    Self::try_open(db_path, root).await
161                } else {
162                    Err(e)
163                }
164            }
165        }
166    }
167
168    /// Internal: try to open database without recovery
169    async fn try_open(db_path: &Path, root: &Path) -> Result<Self, libsql::Error> {
170        let db = libsql::Builder::new_local(db_path).build().await?;
171        let conn = db.connect()?;
172
173        // Quick integrity check - this will catch most corruption
174        // PRAGMA quick_check is faster than full integrity_check
175        let mut rows = conn.query("PRAGMA quick_check(1)", ()).await?;
176        let integrity: String = if let Some(row) = rows.next().await? {
177            row.get(0).unwrap_or_else(|_| "error".to_string())
178        } else {
179            "error".to_string()
180        };
181        if integrity != "ok" {
182            return Err(libsql::Error::SqliteFailure(
183                11, // SQLITE_CORRUPT
184                format!("Database integrity check failed: {}", integrity),
185            ));
186        }
187
188        // Initialize schema
189        conn.execute(
190            "CREATE TABLE IF NOT EXISTS meta (
191                key TEXT PRIMARY KEY,
192                value TEXT
193            )",
194            (),
195        )
196        .await?;
197        conn.execute(
198            "CREATE TABLE IF NOT EXISTS files (
199                path TEXT PRIMARY KEY,
200                is_dir INTEGER NOT NULL,
201                mtime INTEGER NOT NULL,
202                lines INTEGER NOT NULL DEFAULT 0
203            )",
204            (),
205        )
206        .await?;
207        conn.execute(
208            "CREATE INDEX IF NOT EXISTS idx_files_name ON files(path)",
209            (),
210        )
211        .await?;
212
213        // Call graph for fast caller/callee lookups
214        conn.execute(
215            "CREATE TABLE IF NOT EXISTS calls (
216                caller_file TEXT NOT NULL,
217                caller_symbol TEXT NOT NULL,
218                callee_name TEXT NOT NULL,
219                callee_qualifier TEXT,
220                callee_resolved_file TEXT,
221                line INTEGER NOT NULL,
222                access TEXT
223            )",
224            (),
225        )
226        .await?;
227        conn.execute(
228            "CREATE INDEX IF NOT EXISTS idx_calls_callee ON calls(callee_name)",
229            (),
230        )
231        .await?;
232        conn.execute(
233            "CREATE INDEX IF NOT EXISTS idx_calls_caller ON calls(caller_file, caller_symbol)",
234            (),
235        )
236        .await?;
237        conn.execute(
238            "CREATE INDEX IF NOT EXISTS idx_calls_qualifier ON calls(callee_qualifier)",
239            (),
240        )
241        .await?;
242        // May fail on old DBs where the column doesn't exist yet; migration below adds it.
243        conn.execute(
244            "CREATE INDEX IF NOT EXISTS idx_calls_resolved ON calls(callee_resolved_file)",
245            (),
246        )
247        .await
248        .ok();
249
250        // Symbol definitions
251        conn.execute(
252            "CREATE TABLE IF NOT EXISTS symbols (
253                file TEXT NOT NULL,
254                name TEXT NOT NULL,
255                kind TEXT NOT NULL,
256                start_line INTEGER NOT NULL,
257                end_line INTEGER NOT NULL,
258                parent TEXT,
259                visibility TEXT NOT NULL DEFAULT 'public',
260                is_impl INTEGER NOT NULL DEFAULT 0
261            )",
262            (),
263        )
264        .await?;
265        conn.execute(
266            "CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name)",
267            (),
268        )
269        .await?;
270        conn.execute(
271            "CREATE INDEX IF NOT EXISTS idx_symbols_file ON symbols(file)",
272            (),
273        )
274        .await?;
275
276        // Symbol attributes (one row per attribute per symbol)
277        conn.execute(
278            "CREATE TABLE IF NOT EXISTS symbol_attributes (
279                file TEXT NOT NULL,
280                name TEXT NOT NULL,
281                attribute TEXT NOT NULL
282            )",
283            (),
284        )
285        .await?;
286        conn.execute(
287            "CREATE INDEX IF NOT EXISTS idx_symbol_attributes_file_name ON symbol_attributes(file, name)",
288            (),
289        )
290        .await?;
291
292        // Symbol implements (one row per interface/trait per symbol)
293        conn.execute(
294            "CREATE TABLE IF NOT EXISTS symbol_implements (
295                file TEXT NOT NULL,
296                name TEXT NOT NULL,
297                interface TEXT NOT NULL
298            )",
299            (),
300        )
301        .await?;
302        conn.execute(
303            "CREATE INDEX IF NOT EXISTS idx_symbol_implements_file_name ON symbol_implements(file, name)",
304            (),
305        )
306        .await?;
307
308        // Import tracking
309        conn.execute(
310            "CREATE TABLE IF NOT EXISTS imports (
311                file TEXT NOT NULL,
312                module TEXT,
313                name TEXT NOT NULL,
314                alias TEXT,
315                line INTEGER NOT NULL,
316                resolved_file TEXT,
317                is_reexport INTEGER NOT NULL DEFAULT 0
318            )",
319            (),
320        )
321        .await?;
322        conn.execute(
323            "CREATE INDEX IF NOT EXISTS idx_imports_file ON imports(file)",
324            (),
325        )
326        .await?;
327        conn.execute(
328            "CREATE INDEX IF NOT EXISTS idx_imports_name ON imports(name)",
329            (),
330        )
331        .await?;
332        conn.execute(
333            "CREATE INDEX IF NOT EXISTS idx_imports_module ON imports(module)",
334            (),
335        )
336        .await?;
337        conn.execute(
338            "CREATE INDEX IF NOT EXISTS idx_imports_resolved ON imports(resolved_file)",
339            (),
340        )
341        .await?;
342
343        // Type method signatures
344        conn.execute(
345            "CREATE TABLE IF NOT EXISTS type_methods (
346                file TEXT NOT NULL,
347                type_name TEXT NOT NULL,
348                method_name TEXT NOT NULL,
349                PRIMARY KEY (file, type_name, method_name)
350            )",
351            (),
352        )
353        .await?;
354        conn.execute(
355            "CREATE INDEX IF NOT EXISTS idx_type_methods_type ON type_methods(type_name)",
356            (),
357        )
358        .await?;
359
360        // Type references (type-to-type dependencies)
361        conn.execute(
362            "CREATE TABLE IF NOT EXISTS type_refs (
363                file TEXT NOT NULL,
364                source_symbol TEXT NOT NULL,
365                target_type TEXT NOT NULL,
366                kind TEXT NOT NULL,
367                line INTEGER NOT NULL
368            )",
369            (),
370        )
371        .await?;
372        conn.execute(
373            "CREATE INDEX IF NOT EXISTS idx_type_refs_file ON type_refs(file)",
374            (),
375        )
376        .await?;
377        conn.execute(
378            "CREATE INDEX IF NOT EXISTS idx_type_refs_source ON type_refs(source_symbol)",
379            (),
380        )
381        .await?;
382        conn.execute(
383            "CREATE INDEX IF NOT EXISTS idx_type_refs_target ON type_refs(target_type)",
384            (),
385        )
386        .await?;
387
388        // Migrate existing tables: add columns that may be missing from older schemas.
389        // SQLite errors on duplicate ADD COLUMN, so we ignore failures.
390        conn.execute(
391            "ALTER TABLE symbols ADD COLUMN visibility TEXT NOT NULL DEFAULT 'public'",
392            (),
393        )
394        .await
395        .ok();
396        conn.execute(
397            "ALTER TABLE symbols ADD COLUMN is_impl INTEGER NOT NULL DEFAULT 0",
398            (),
399        )
400        .await
401        .ok();
402        // resolved_file was added to imports after schema version 5 was already set;
403        // run unconditionally so existing v5 DBs without the column get migrated.
404        conn.execute("ALTER TABLE imports ADD COLUMN resolved_file TEXT", ())
405            .await
406            .ok();
407
408        // Check schema version
409        let mut rows = conn
410            .query(
411                "SELECT CAST(value AS INTEGER) FROM meta WHERE key = 'schema_version'",
412                (),
413            )
414            .await?;
415        let version: i64 = if let Some(row) = rows.next().await? {
416            row.get(0).unwrap_or(0)
417        } else {
418            0
419        };
420
421        if version != SCHEMA_VERSION {
422            // Reset on schema change
423            conn.execute("DELETE FROM files", ()).await?;
424            conn.execute("DELETE FROM calls", ()).await?;
425            conn.execute("DELETE FROM symbols", ()).await?;
426            conn.execute("DELETE FROM imports", ()).await?;
427            // Add new columns that may not exist in older schema versions.
428            // Use .ok() to tolerate "duplicate column" errors on already-migrated DBs.
429            conn.execute("ALTER TABLE imports ADD COLUMN resolved_file TEXT", ())
430                .await
431                .ok(); // ignore "duplicate column" error on fresh DBs
432            conn.execute(
433                "ALTER TABLE imports ADD COLUMN is_reexport INTEGER NOT NULL DEFAULT 0",
434                (),
435            )
436            .await
437            .ok(); // ignore "duplicate column" error on fresh DBs
438            conn.execute("ALTER TABLE calls ADD COLUMN callee_resolved_file TEXT", ())
439                .await
440                .ok(); // ignore "duplicate column" error on fresh DBs
441            conn.execute("ALTER TABLE calls ADD COLUMN access TEXT", ())
442                .await
443                .ok(); // ignore "duplicate column" error on fresh DBs
444            conn.execute(
445                "CREATE INDEX IF NOT EXISTS idx_calls_resolved ON calls(callee_resolved_file)",
446                (),
447            )
448            .await?;
449            conn.execute("DELETE FROM type_methods", ()).await?;
450            conn.execute("DELETE FROM type_refs", ()).await?;
451            conn.execute("DELETE FROM symbol_attributes", ()).await?;
452            conn.execute("DELETE FROM symbol_implements", ()).await?;
453            // co_change_edges: clear on schema bump so the next rebuild repopulates.
454            conn.execute("DELETE FROM co_change_edges", ()).await.ok();
455            conn.execute("DELETE FROM meta WHERE key = 'co_change_last_commit'", ())
456                .await
457                .ok();
458            // Both diagnostic tables get dropped + recreated on every schema bump
459            // (column shape has changed in past bumps and may again — simplest path).
460            conn.execute("DROP TABLE IF EXISTS daemon_diagnostics", ())
461                .await
462                .ok();
463            conn.execute("DROP TABLE IF EXISTS daemon_diagnostics_per_file", ())
464                .await
465                .ok();
466            conn.execute(
467                "INSERT OR REPLACE INTO meta (key, value) VALUES ('schema_version', ?1)",
468                params![SCHEMA_VERSION.to_string()],
469            )
470            .await?;
471        }
472
473        // Create convenience views for agent queries.
474        // These are idempotent (CREATE VIEW IF NOT EXISTS) and safe to run on every open.
475
476        // entry_points: public symbols that are never called internally.
477        // Identifies API surface that external callers enter through — functions/types
478        // that are exported but have no recorded callers within the indexed codebase.
479        // Useful for finding dead public API candidates and top-level entry symbols.
480        conn.execute(
481            "CREATE VIEW IF NOT EXISTS entry_points AS
482             SELECT s.file, s.name, s.kind, s.start_line, s.end_line
483             FROM symbols s
484             WHERE s.visibility = 'public'
485               AND NOT EXISTS (
486                   SELECT 1 FROM calls c WHERE c.callee_name = s.name
487               )",
488            (),
489        )
490        .await
491        .ok();
492
493        // external_deps: imports whose module specifier could not be resolved to a
494        // file within the indexed root (resolved_file IS NULL). These represent
495        // third-party packages, stdlib imports, or imports outside the project root.
496        // Used to distinguish in-project edges from external dependencies in analysis.
497        conn.execute(
498            "CREATE VIEW IF NOT EXISTS external_deps AS
499             SELECT file, module, name, alias, line
500             FROM imports
501             WHERE resolved_file IS NULL",
502            (),
503        )
504        .await
505        .ok();
506
507        // external_surface: public symbols that are called by files whose own imports
508        // include at least one unresolved (external) dependency.
509        // Identifies the boundary between internal implementation and externally-facing
510        // API — the symbols that external-dependency-using files actually invoke.
511        conn.execute(
512            "CREATE VIEW IF NOT EXISTS external_surface AS
513             SELECT DISTINCT s.file, s.name, s.kind, s.start_line, s.end_line
514             FROM symbols s
515             WHERE s.visibility = 'public'
516               AND EXISTS (
517                   SELECT 1 FROM calls c
518                   WHERE c.callee_name = s.name
519                     AND EXISTS (
520                         SELECT 1 FROM external_deps ed WHERE ed.file = c.caller_file
521                     )
522               )",
523            (),
524        )
525        .await
526        .ok();
527
528        // Co-change edges: file pairs that appear together in commits.
529        // Populated by rebuild_co_change_edges(); queried by coupling-clusters.
530        conn.execute(
531            "CREATE TABLE IF NOT EXISTS co_change_edges (
532                file_a TEXT NOT NULL,
533                file_b TEXT NOT NULL,
534                count INTEGER NOT NULL,
535                PRIMARY KEY (file_a, file_b)
536            )",
537            (),
538        )
539        .await?;
540        conn.execute(
541            "CREATE INDEX IF NOT EXISTS idx_co_change_file_a ON co_change_edges(file_a)",
542            (),
543        )
544        .await?;
545        conn.execute(
546            "CREATE INDEX IF NOT EXISTS idx_co_change_file_b ON co_change_edges(file_b)",
547            (),
548        )
549        .await?;
550
551        // Daemon diagnostics cache: one row per engine. `config_hash` mismatch on load = cache miss.
552        conn.execute(
553            "CREATE TABLE IF NOT EXISTS daemon_diagnostics (
554                engine TEXT PRIMARY KEY,
555                issues_blob BLOB NOT NULL,
556                config_hash TEXT NOT NULL,
557                updated_at INTEGER NOT NULL
558            )",
559            (),
560        )
561        .await?;
562
563        // Per-file diagnostics cache: one row per file that currently has issues.
564        // "No row" semantics — files with zero issues are absent from the table.
565        // Used by the daemon to serve per-file `RunRules` queries directly without
566        // touching the "all" blob.
567        conn.execute(
568            "CREATE TABLE IF NOT EXISTS daemon_diagnostics_per_file (
569                path TEXT PRIMARY KEY,
570                issues_blob BLOB NOT NULL,
571                config_hash TEXT NOT NULL,
572                updated_at INTEGER NOT NULL
573            )",
574            (),
575        )
576        .await?;
577
578        // Open CA cache (best-effort — a failure here is non-fatal)
579        let ca_cache = match crate::ca_cache::CaCache::open(
580            &crate::ca_cache::CaCache::default_path(),
581            1024 * 1024 * 1024, // 1 GiB limit
582        ) {
583            Ok(c) => {
584                // GC stale versions at startup (best-effort)
585                if let Err(e) = c.gc_stale_versions(EXTRACTOR_VERSION) {
586                    tracing::warn!("normalize-facts: CA cache GC error: {}", e);
587                }
588                Some(c)
589            }
590            Err(e) => {
591                tracing::warn!("normalize-facts: failed to open CA cache: {}", e);
592                None
593            }
594        };
595
596        Ok(Self {
597            conn,
598            db,
599            root: root.to_path_buf(),
600            progress: false,
601            ca_cache,
602        })
603    }
604
605    /// Enable progress bar output for long-running operations (refresh, call graph).
606    /// Only shows bars when stderr is a terminal.
607    pub fn set_progress(&mut self, enabled: bool) {
608        self.progress = enabled;
609    }
610
611    /// Get a reference to the underlying SQLite connection for direct queries
612    pub fn connection(&self) -> &Connection {
613        &self.conn
614    }
615
616    /// Get files that have changed since last index
617    pub async fn get_changed_files(&self) -> Result<ChangedFiles, libsql::Error> {
618        let mut result = ChangedFiles::default();
619
620        // Get all indexed files with their mtimes
621        let mut indexed: std::collections::HashMap<String, i64> = std::collections::HashMap::new();
622        {
623            let mut rows = self
624                .conn
625                .query("SELECT path, mtime FROM files WHERE is_dir = 0", ())
626                .await?;
627            while let Some(row) = rows.next().await? {
628                let path: String = row.get(0)?;
629                let mtime: i64 = row.get(1)?;
630                indexed.insert(path, mtime);
631            }
632        }
633
634        // Walk current filesystem
635        let walker = WalkBuilder::new(&self.root)
636            .hidden(false)
637            .git_ignore(true)
638            .git_global(true)
639            .git_exclude(true)
640            .build();
641
642        let mut seen = std::collections::HashSet::new();
643        for entry in walker.flatten() {
644            let path = entry.path();
645            if path.is_dir() {
646                continue;
647            }
648            if let Ok(rel) = path.strip_prefix(&self.root) {
649                let rel_str = rel.to_string_lossy().to_string();
650                // Skip internal directories
651                if rel_str.is_empty() || rel_str == ".git" || rel_str.starts_with(".git/") {
652                    continue;
653                }
654                seen.insert(rel_str.clone());
655
656                let current_mtime = path
657                    .metadata()
658                    .ok()
659                    .and_then(|m| m.modified().ok())
660                    .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
661                    .map(|d| d.as_secs() as i64)
662                    .unwrap_or(0);
663
664                if let Some(&indexed_mtime) = indexed.get(&rel_str) {
665                    if current_mtime > indexed_mtime {
666                        result.modified.push(rel_str);
667                    }
668                } else {
669                    result.added.push(rel_str);
670                }
671            }
672        }
673
674        // Find deleted files
675        for path in indexed.keys() {
676            if !seen.contains(path) {
677                result.deleted.push(path.clone());
678            }
679        }
680
681        Ok(result)
682    }
683
684    /// Check if refresh is needed using fast heuristics.
685    /// Returns true if changes are likely.
686    async fn needs_refresh(&self) -> bool {
687        let mut rows = match self
688            .conn
689            .query(
690                "SELECT CAST(value AS INTEGER) FROM meta WHERE key = 'last_indexed'",
691                (),
692            )
693            .await
694        {
695            Ok(r) => r,
696            Err(_) => return true,
697        };
698        let last_indexed: i64 = match rows.next().await {
699            Ok(Some(row)) => row.get(0).unwrap_or(0),
700            _ => 0,
701        };
702
703        // Never indexed
704        if last_indexed == 0 {
705            return true;
706        }
707
708        let now = SystemTime::now()
709            .duration_since(UNIX_EPOCH)
710            .map(|d| d.as_secs() as i64)
711            .unwrap_or(0);
712
713        // Allow 60s staleness - don't check on every call
714        if now - last_indexed < 60 {
715            return false;
716        }
717
718        // Check mtimes of top-level entries (catches new/deleted files)
719        if let Ok(entries) = std::fs::read_dir(&self.root) {
720            for entry in entries.flatten() {
721                let name = entry.file_name();
722                let name_str = name.to_string_lossy();
723                if name_str.starts_with('.') {
724                    continue;
725                }
726                if let Ok(meta) = entry.metadata()
727                    && let Ok(mtime) = meta.modified()
728                {
729                    let mtime_secs = mtime
730                        .duration_since(UNIX_EPOCH)
731                        .map(|d| d.as_secs() as i64)
732                        .unwrap_or(0);
733                    if mtime_secs > last_indexed {
734                        return true;
735                    }
736                }
737            }
738        }
739
740        // Sample some indexed files to catch modifications
741        // Check ~100 files spread across the index
742        if let Ok(mut rows) = self
743            .conn
744            .query(
745                "SELECT path, mtime FROM files WHERE is_dir = 0 ORDER BY RANDOM() LIMIT 100",
746                (),
747            )
748            .await
749        {
750            while let Ok(Some(row)) = rows.next().await {
751                let path: String = match row.get(0) {
752                    Ok(p) => p,
753                    Err(_) => continue,
754                };
755                let indexed_mtime: i64 = match row.get(1) {
756                    Ok(m) => m,
757                    Err(_) => continue,
758                };
759                let full_path = self.root.join(&path);
760                if let Ok(meta) = full_path.metadata()
761                    && let Ok(mtime) = meta.modified()
762                {
763                    let current_mtime = mtime
764                        .duration_since(UNIX_EPOCH)
765                        .map(|d| d.as_secs() as i64)
766                        .unwrap_or(0);
767                    if current_mtime > indexed_mtime {
768                        return true;
769                    }
770                }
771            }
772        }
773
774        false
775    }
776
777    /// Test/maintenance helper: clear the `last_indexed` meta value so the next
778    /// `needs_refresh()` returns `true` regardless of the 60-second debounce.
779    ///
780    /// Used by integration tests that need to force refresh after each file
781    /// edit without waiting for the staleness window.
782    pub async fn invalidate_last_indexed(&self) -> Result<(), libsql::Error> {
783        self.conn
784            .execute("DELETE FROM meta WHERE key = 'last_indexed'", ())
785            .await?;
786        Ok(())
787    }
788
789    /// Refresh only files that have changed (faster than full refresh).
790    /// Returns the list of changed file paths (absolute) that were added, modified, or deleted.
791    /// The count can be derived from `.len()`.
792    pub async fn incremental_refresh(&mut self) -> Result<Vec<PathBuf>, libsql::Error> {
793        if !self.needs_refresh().await {
794            return Ok(Vec::new());
795        }
796        self.incremental_refresh_force().await
797    }
798
799    /// Refresh only files that have changed, bypassing the `needs_refresh()`
800    /// staleness gate.
801    ///
802    /// `incremental_refresh()` short-circuits if the index was refreshed within
803    /// the last 60 seconds and no top-level mtime changes are visible — a cheap
804    /// "probably nothing changed" heuristic for cold-CLI callers running many
805    /// commands in quick succession. For an event-driven daemon, the watcher
806    /// firing **is** the signal that something changed, so the gate is wrong.
807    /// Daemons should call this variant.
808    pub async fn incremental_refresh_force(&mut self) -> Result<Vec<PathBuf>, libsql::Error> {
809        let changed = self.get_changed_files().await?;
810        let total_changes = changed.added.len() + changed.modified.len() + changed.deleted.len();
811
812        if total_changes == 0 {
813            return Ok(Vec::new());
814        }
815
816        self.conn.execute("BEGIN", ()).await?;
817
818        // Delete removed files
819        for path in &changed.deleted {
820            self.conn
821                .execute("DELETE FROM files WHERE path = ?1", params![path.clone()])
822                .await?;
823        }
824
825        // Update/insert changed files
826        for path in changed.added.iter().chain(changed.modified.iter()) {
827            let full_path = self.root.join(path);
828            let is_dir = full_path.is_dir();
829            let mtime = full_path
830                .metadata()
831                .ok()
832                .and_then(|m| m.modified().ok())
833                .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
834                .map(|d| d.as_secs() as i64)
835                .unwrap_or(0);
836            // Count lines for text files (binary files will fail read_to_string and get 0)
837            let lines = if is_dir {
838                0
839            } else {
840                std::fs::read_to_string(&full_path)
841                    .map(|s| s.lines().count())
842                    .unwrap_or(0)
843            };
844
845            self.conn.execute(
846                "INSERT OR REPLACE INTO files (path, is_dir, mtime, lines) VALUES (?1, ?2, ?3, ?4)",
847                params![path.clone(), is_dir as i64, mtime, lines as i64],
848            ).await?;
849        }
850
851        // Update last indexed time
852        let now = SystemTime::now()
853            .duration_since(UNIX_EPOCH)
854            .map(|d| d.as_secs() as i64)
855            .unwrap_or(0);
856        self.conn
857            .execute(
858                "INSERT OR REPLACE INTO meta (key, value) VALUES ('last_indexed', ?1)",
859                params![now.to_string()],
860            )
861            .await?;
862
863        self.conn.execute("COMMIT", ()).await?;
864
865        // Collect all changed paths as absolute PathBufs
866        let all_changed: Vec<PathBuf> = changed
867            .added
868            .iter()
869            .chain(changed.modified.iter())
870            .chain(changed.deleted.iter())
871            .map(|p| self.root.join(p))
872            .collect();
873
874        Ok(all_changed)
875    }
876
877    /// Execute a raw SQL statement (for maintenance operations).
878    pub async fn execute(&self, sql: &str) -> Result<u64, libsql::Error> {
879        self.conn.execute(sql, ()).await
880    }
881
882    /// Run an arbitrary read-only SQL query and return results as a list of row maps.
883    ///
884    /// Each row is a `serde_json::Map` from column name to value.
885    /// Useful for agent-driven exploration of the structural index.
886    pub async fn raw_query(
887        &self,
888        sql: &str,
889    ) -> Result<Vec<serde_json::Map<String, serde_json::Value>>, libsql::Error> {
890        let mut rows = self.conn.query(sql, ()).await?;
891        let mut result = Vec::new();
892        while let Some(row) = rows.next().await? {
893            let col_count = row.column_count();
894            let mut map = serde_json::Map::new();
895            for i in 0..col_count {
896                let col_name = row.column_name(i).unwrap_or("?").to_string();
897                let value = match row.get_value(i)? {
898                    libsql::Value::Null => serde_json::Value::Null,
899                    libsql::Value::Integer(n) => serde_json::Value::Number(n.into()),
900                    libsql::Value::Real(f) => serde_json::json!(f),
901                    libsql::Value::Text(s) => serde_json::Value::String(s),
902                    libsql::Value::Blob(b) => {
903                        serde_json::Value::String(format!("<blob {} bytes>", b.len()))
904                    }
905                };
906                map.insert(col_name, value);
907            }
908            result.push(map);
909        }
910        Ok(result)
911    }
912
913    /// Refresh the index by walking the filesystem
914    pub async fn refresh(&mut self) -> Result<usize, libsql::Error> {
915        let walker = WalkBuilder::new(&self.root)
916            .hidden(false)
917            .git_ignore(true)
918            .git_global(true)
919            .git_exclude(true)
920            .build();
921
922        self.conn.execute("BEGIN", ()).await?;
923
924        // Clear existing files
925        self.conn.execute("DELETE FROM files", ()).await?;
926
927        let pb = if self.progress && std::io::IsTerminal::is_terminal(&std::io::stderr()) {
928            let pb = ProgressBar::new_spinner();
929            pb.set_style(
930                ProgressStyle::with_template("{spinner:.cyan} {msg} [{elapsed_precise}]")
931                    .unwrap_or_else(|_| ProgressStyle::default_spinner()),
932            );
933            pb.set_message("Scanning files...");
934            pb
935        } else {
936            ProgressBar::hidden()
937        };
938
939        let mut count = 0;
940        for entry in walker.flatten() {
941            let path = entry.path();
942            if let Ok(rel) = path.strip_prefix(&self.root) {
943                let rel_str = rel.to_string_lossy().to_string();
944                // Skip internal directories
945                if rel_str.is_empty() || rel_str == ".git" || rel_str.starts_with(".git/") {
946                    continue;
947                }
948
949                let is_dir = path.is_dir();
950                let mtime = path
951                    .metadata()
952                    .ok()
953                    .and_then(|m| m.modified().ok())
954                    .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
955                    .map(|d| d.as_secs() as i64)
956                    .unwrap_or(0);
957                // Count lines for text files (binary files will fail read_to_string and get 0)
958                let lines = if is_dir {
959                    0
960                } else {
961                    std::fs::read_to_string(path)
962                        .map(|s| s.lines().count())
963                        .unwrap_or(0)
964                };
965
966                self.conn
967                    .execute(
968                        "INSERT INTO files (path, is_dir, mtime, lines) VALUES (?1, ?2, ?3, ?4)",
969                        params![rel_str, is_dir as i64, mtime, lines as i64],
970                    )
971                    .await?;
972                count += 1;
973                pb.set_message(format!("Scanning files... {count}"));
974                pb.tick();
975            }
976        }
977
978        pb.finish_and_clear();
979
980        // Update last indexed time
981        let now = SystemTime::now()
982            .duration_since(UNIX_EPOCH)
983            .map(|d| d.as_secs() as i64)
984            .unwrap_or(0);
985        self.conn
986            .execute(
987                "INSERT OR REPLACE INTO meta (key, value) VALUES ('last_indexed', ?1)",
988                params![now.to_string()],
989            )
990            .await?;
991
992        self.conn.execute("COMMIT", ()).await?;
993
994        Ok(count)
995    }
996
997    /// Get all files from the index
998    pub async fn all_files(&self) -> Result<Vec<IndexedFile>, libsql::Error> {
999        let mut rows = self
1000            .conn
1001            .query("SELECT path, is_dir, mtime, lines FROM files", ())
1002            .await?;
1003        let mut files = Vec::new();
1004        while let Some(row) = rows.next().await? {
1005            files.push(IndexedFile {
1006                path: row.get(0)?,
1007                is_dir: row.get::<i64>(1)? != 0,
1008                mtime: row.get(2)?,
1009                lines: u64::try_from(row.get::<i64>(3)?).unwrap_or(0) as usize,
1010            });
1011        }
1012        Ok(files)
1013    }
1014
1015    /// Search files by exact name match
1016    pub async fn find_by_name(&self, name: &str) -> Result<Vec<IndexedFile>, libsql::Error> {
1017        let pattern = format!("%/{}", name);
1018        let mut rows = self
1019            .conn
1020            .query(
1021                "SELECT path, is_dir, mtime, lines FROM files WHERE path LIKE ?1 OR path = ?2",
1022                params![pattern, name],
1023            )
1024            .await?;
1025        let mut files = Vec::new();
1026        while let Some(row) = rows.next().await? {
1027            files.push(IndexedFile {
1028                path: row.get(0)?,
1029                is_dir: row.get::<i64>(1)? != 0,
1030                mtime: row.get(2)?,
1031                lines: u64::try_from(row.get::<i64>(3)?).unwrap_or(0) as usize,
1032            });
1033        }
1034        Ok(files)
1035    }
1036
1037    /// Search files by stem (filename without extension)
1038    pub async fn find_by_stem(&self, stem: &str) -> Result<Vec<IndexedFile>, libsql::Error> {
1039        let pattern = format!("%/{}%", stem);
1040        let mut rows = self
1041            .conn
1042            .query(
1043                "SELECT path, is_dir, mtime, lines FROM files WHERE path LIKE ?1",
1044                params![pattern],
1045            )
1046            .await?;
1047        let mut files = Vec::new();
1048        while let Some(row) = rows.next().await? {
1049            files.push(IndexedFile {
1050                path: row.get(0)?,
1051                is_dir: row.get::<i64>(1)? != 0,
1052                mtime: row.get(2)?,
1053                lines: u64::try_from(row.get::<i64>(3)?).unwrap_or(0) as usize,
1054            });
1055        }
1056        Ok(files)
1057    }
1058
1059    /// Count indexed files
1060    pub async fn count(&self) -> Result<usize, libsql::Error> {
1061        let mut rows = self.conn.query("SELECT COUNT(*) FROM files", ()).await?;
1062        if let Some(row) = rows.next().await? {
1063            Ok(u64::try_from(row.get::<i64>(0)?).unwrap_or(0) as usize)
1064        } else {
1065            Ok(0)
1066        }
1067    }
1068
1069    /// Index symbols and call graph for a file
1070    #[allow(dead_code)] // FileIndex API - used by daemon
1071    pub async fn index_file_symbols(
1072        &self,
1073        path: &str,
1074        symbols: &[FlatSymbol],
1075        calls: &[(String, String, usize)],
1076    ) -> Result<(), libsql::Error> {
1077        // Insert symbols
1078        for sym in symbols {
1079            self.conn.execute(
1080                "INSERT INTO symbols (file, name, kind, start_line, end_line, parent, visibility, is_impl) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
1081                params![path.to_string(), sym.name.clone(), sym.kind.as_str(), sym.start_line as i64, sym.end_line as i64, sym.parent.clone(), sym.visibility.as_str(), sym.is_interface_impl as i64],
1082            ).await?;
1083            for attr in &sym.attributes {
1084                self.conn
1085                    .execute(
1086                        "INSERT INTO symbol_attributes (file, name, attribute) VALUES (?1, ?2, ?3)",
1087                        params![path.to_string(), sym.name.clone(), attr.clone()],
1088                    )
1089                    .await?;
1090            }
1091            if let Some(doc) = &sym.docstring {
1092                self.conn
1093                    .execute(
1094                        "INSERT INTO symbol_attributes (file, name, attribute) VALUES (?1, ?2, ?3)",
1095                        params![path.to_string(), sym.name.clone(), format!("doc:{doc}")],
1096                    )
1097                    .await?;
1098            }
1099            for iface in &sym.implements {
1100                self.conn
1101                    .execute(
1102                        "INSERT INTO symbol_implements (file, name, interface) VALUES (?1, ?2, ?3)",
1103                        params![path.to_string(), sym.name.clone(), iface.clone()],
1104                    )
1105                    .await?;
1106            }
1107        }
1108
1109        // Insert calls (caller_symbol, callee_name, line)
1110        for (caller_symbol, callee_name, line) in calls {
1111            self.conn.execute(
1112                "INSERT INTO calls (caller_file, caller_symbol, callee_name, line) VALUES (?1, ?2, ?3, ?4)",
1113                params![path.to_string(), caller_symbol.clone(), callee_name.clone(), *line as i64],
1114            ).await?;
1115        }
1116
1117        Ok(())
1118    }
1119
1120    /// Find callers of a specific symbol definition (from call graph).
1121    ///
1122    /// `def_file` is the file that contains the definition being searched. Results are
1123    /// restricted to files that are `def_file` itself (self-recursive calls) or that
1124    /// explicitly import the symbol. This prevents false positives from unrelated
1125    /// functions with the same name in other modules.
1126    ///
1127    /// Resolves through imports: if file A imports X as Y and calls Y(), it is found
1128    /// as a caller of X. Also handles qualified calls (`foo.bar()`) and `self.method()`
1129    /// resolved to the containing class.
1130    pub async fn find_callers(
1131        &self,
1132        symbol_name: &str,
1133        def_file: &str,
1134    ) -> Result<Vec<(String, String, usize, Option<String>)>, libsql::Error> {
1135        // Handle Class.method format - split and search for method within class
1136        let (class_filter, method_name) = if symbol_name.contains('.') {
1137            let parts: Vec<&str> = symbol_name.splitn(2, '.').collect();
1138            (Some(parts[0]), parts[1])
1139        } else {
1140            (None, symbol_name)
1141        };
1142
1143        // If searching for Class.method, find callers that call self.method within that class
1144        if let Some(class_name) = class_filter {
1145            let mut rows = self
1146                .conn
1147                .query(
1148                    "SELECT c.caller_file, c.caller_symbol, c.line, c.access
1149                 FROM calls c
1150                 JOIN symbols s ON c.caller_file = s.file AND c.caller_symbol = s.name
1151                 WHERE c.callee_name = ?1 AND c.callee_qualifier = 'self' AND s.parent = ?2",
1152                    params![method_name, class_name],
1153                )
1154                .await?;
1155            let mut callers = Vec::new();
1156            while let Some(row) = rows.next().await? {
1157                callers.push((
1158                    row.get(0)?,
1159                    row.get(1)?,
1160                    u64::try_from(row.get::<i64>(2)?).unwrap_or(0) as usize,
1161                    row.get::<Option<String>>(3)?,
1162                ));
1163            }
1164
1165            if !callers.is_empty() {
1166                return Ok(callers);
1167            }
1168        }
1169
1170        // Use callee_resolved_file when available for precise call resolution.
1171        // Falls back to import-based matching when callee_resolved_file is NULL
1172        // (external packages, unresolved modules).
1173        //
1174        // Branch 1: callee_resolved_file = def_file (precise match)
1175        // Branch 2: Same-file calls (caller_file = def_file, no qualifier)
1176        // Branch 3: Import-based fallback for unresolved calls (callee_resolved_file IS NULL)
1177        // Branch 4: self.method() calls within a class
1178        let mut rows = self.conn.query(
1179            "SELECT caller_file, caller_symbol, line, access FROM calls
1180             WHERE callee_name = ?1 AND callee_resolved_file = ?2
1181             UNION
1182             SELECT caller_file, caller_symbol, line, access FROM calls
1183             WHERE callee_name = ?1 AND caller_file = ?2
1184               AND callee_resolved_file IS NULL AND callee_qualifier IS NULL
1185             UNION
1186             SELECT c.caller_file, c.caller_symbol, c.line, c.access
1187             FROM calls c
1188             JOIN imports i ON c.caller_file = i.file AND c.callee_name = COALESCE(i.alias, i.name)
1189             WHERE i.name = ?1 AND c.callee_resolved_file IS NULL
1190               AND (i.resolved_file = ?2 OR i.resolved_file IS NULL)
1191             UNION
1192             SELECT c.caller_file, c.caller_symbol, c.line, c.access
1193             FROM calls c
1194             JOIN imports i ON c.caller_file = i.file AND c.callee_qualifier = COALESCE(i.alias, i.name)
1195             WHERE c.callee_name = ?1 AND i.module IS NULL AND c.callee_resolved_file IS NULL
1196               AND (i.resolved_file = ?2 OR i.resolved_file IS NULL)
1197             UNION
1198             SELECT c.caller_file, c.caller_symbol, c.line, c.access
1199             FROM calls c
1200             JOIN symbols s ON c.caller_file = s.file AND c.caller_symbol = s.name
1201             WHERE c.callee_name = ?1 AND c.callee_qualifier = 'self'
1202               AND s.parent IS NOT NULL AND c.callee_resolved_file IS NULL",
1203            params![method_name, def_file],
1204        ).await?;
1205        let mut callers = Vec::new();
1206        while let Some(row) = rows.next().await? {
1207            callers.push((
1208                row.get(0)?,
1209                row.get(1)?,
1210                u64::try_from(row.get::<i64>(2)?).unwrap_or(0) as usize,
1211                row.get::<Option<String>>(3)?,
1212            ));
1213        }
1214
1215        Ok(callers)
1216    }
1217
1218    /// Find callees of a symbol (what it calls)
1219    pub async fn find_callees(
1220        &self,
1221        file: &str,
1222        symbol_name: &str,
1223    ) -> Result<Vec<(String, usize, Option<String>)>, libsql::Error> {
1224        let mut rows = self
1225            .conn
1226            .query(
1227                "SELECT callee_name, line, access FROM calls WHERE caller_file = ?1 AND caller_symbol = ?2",
1228                params![file, symbol_name],
1229            )
1230            .await?;
1231        let mut callees = Vec::new();
1232        while let Some(row) = rows.next().await? {
1233            callees.push((
1234                row.get(0)?,
1235                u64::try_from(row.get::<i64>(1)?).unwrap_or(0) as usize,
1236                row.get::<Option<String>>(2)?,
1237            ));
1238        }
1239        Ok(callees)
1240    }
1241
1242    /// Find callees with their resolved definition file.
1243    ///
1244    /// Returns `(callee_name, line, Option<def_file>)` where `def_file` is the
1245    /// root-relative path of the file that defines the callee, resolved via the
1246    /// imports table's `resolved_file` column. `None` means the callee is locally
1247    /// defined, external, or could not be resolved.
1248    pub async fn find_callees_resolved(
1249        &self,
1250        file: &str,
1251        symbol_name: &str,
1252    ) -> Result<Vec<(String, usize, Option<String>)>, libsql::Error> {
1253        let mut rows = self
1254            .conn
1255            .query(
1256                "SELECT c.callee_name, c.line, i.resolved_file
1257                 FROM calls c
1258                 LEFT JOIN imports i
1259                   ON c.caller_file = i.file
1260                   AND c.callee_name = COALESCE(i.alias, i.name)
1261                 WHERE c.caller_file = ?1 AND c.caller_symbol = ?2",
1262                params![file, symbol_name],
1263            )
1264            .await?;
1265        let mut callees = Vec::new();
1266        while let Some(row) = rows.next().await? {
1267            callees.push((
1268                row.get(0)?,
1269                u64::try_from(row.get::<i64>(1)?).unwrap_or(0) as usize,
1270                row.get::<Option<String>>(2)?,
1271            ));
1272        }
1273        Ok(callees)
1274    }
1275
1276    /// Find a symbol by name
1277    pub async fn find_symbol(
1278        &self,
1279        name: &str,
1280    ) -> Result<Vec<(String, String, usize, usize)>, libsql::Error> {
1281        let mut rows = self
1282            .conn
1283            .query(
1284                "SELECT file, kind, start_line, end_line FROM symbols WHERE name = ?1",
1285                params![name],
1286            )
1287            .await?;
1288        let mut symbols = Vec::new();
1289        while let Some(row) = rows.next().await? {
1290            symbols.push((
1291                row.get(0)?,
1292                row.get(1)?,
1293                u64::try_from(row.get::<i64>(2)?).unwrap_or(0) as usize,
1294                u64::try_from(row.get::<i64>(3)?).unwrap_or(0) as usize,
1295            ));
1296        }
1297        Ok(symbols)
1298    }
1299
1300    /// Get all distinct symbol names as a HashSet.
1301    pub async fn all_symbol_names(
1302        &self,
1303    ) -> Result<std::collections::HashSet<String>, libsql::Error> {
1304        let mut rows = self
1305            .conn
1306            .query("SELECT DISTINCT name FROM symbols", ())
1307            .await?;
1308        let mut names = std::collections::HashSet::new();
1309        while let Some(row) = rows.next().await? {
1310            names.insert(row.get(0)?);
1311        }
1312        Ok(names)
1313    }
1314
1315    /// Find symbols by name with fuzzy matching, optional kind filter, and limit
1316    pub async fn find_symbols(
1317        &self,
1318        query: &str,
1319        kind: Option<&str>,
1320        fuzzy: bool,
1321        limit: usize,
1322    ) -> Result<Vec<SymbolMatch>, libsql::Error> {
1323        let query_lower = query.to_lowercase();
1324        let prefix_pattern = format!("{}%", query_lower);
1325        let limit_i64 = i64::try_from(limit).unwrap_or(i64::MAX);
1326
1327        let mut symbols = Vec::new();
1328
1329        if fuzzy {
1330            let pattern = format!("%{}%", query_lower);
1331            let mut rows = if let Some(k) = kind {
1332                self.conn
1333                    .query(
1334                        "SELECT name, kind, file, start_line, end_line, parent FROM symbols
1335                     WHERE LOWER(name) LIKE ?1 AND kind = ?2
1336                     ORDER BY
1337                       CASE WHEN LOWER(name) = ?3 THEN 0
1338                            WHEN LOWER(name) LIKE ?4 THEN 1
1339                            ELSE 2 END,
1340                       LENGTH(name), name
1341                     LIMIT ?5",
1342                        params![pattern, k, query_lower, prefix_pattern, limit_i64],
1343                    )
1344                    .await?
1345            } else {
1346                self.conn
1347                    .query(
1348                        "SELECT name, kind, file, start_line, end_line, parent FROM symbols
1349                     WHERE LOWER(name) LIKE ?1
1350                     ORDER BY
1351                       CASE WHEN LOWER(name) = ?2 THEN 0
1352                            WHEN LOWER(name) LIKE ?3 THEN 1
1353                            ELSE 2 END,
1354                       LENGTH(name), name
1355                     LIMIT ?4",
1356                        params![pattern, query_lower, prefix_pattern, limit_i64],
1357                    )
1358                    .await?
1359            };
1360
1361            while let Some(row) = rows.next().await? {
1362                symbols.push(SymbolMatch {
1363                    name: row.get(0)?,
1364                    kind: row.get(1)?,
1365                    file: row.get(2)?,
1366                    start_line: u64::try_from(row.get::<i64>(3)?).unwrap_or(0) as usize,
1367                    end_line: u64::try_from(row.get::<i64>(4)?).unwrap_or(0) as usize,
1368                    parent: row.get(5)?,
1369                });
1370            }
1371        } else {
1372            // Exact match
1373            let mut rows = if let Some(k) = kind {
1374                self.conn
1375                    .query(
1376                        "SELECT name, kind, file, start_line, end_line, parent FROM symbols
1377                     WHERE LOWER(name) = LOWER(?1) AND kind = ?2
1378                     LIMIT ?3",
1379                        params![query, k, limit_i64],
1380                    )
1381                    .await?
1382            } else {
1383                self.conn
1384                    .query(
1385                        "SELECT name, kind, file, start_line, end_line, parent FROM symbols
1386                     WHERE LOWER(name) = LOWER(?1)
1387                     LIMIT ?2",
1388                        params![query, limit_i64],
1389                    )
1390                    .await?
1391            };
1392
1393            while let Some(row) = rows.next().await? {
1394                symbols.push(SymbolMatch {
1395                    name: row.get(0)?,
1396                    kind: row.get(1)?,
1397                    file: row.get(2)?,
1398                    start_line: u64::try_from(row.get::<i64>(3)?).unwrap_or(0) as usize,
1399                    end_line: u64::try_from(row.get::<i64>(4)?).unwrap_or(0) as usize,
1400                    parent: row.get(5)?,
1401                });
1402            }
1403        }
1404
1405        Ok(symbols)
1406    }
1407
1408    /// Get call graph stats
1409    pub async fn call_graph_stats(&self) -> Result<CallGraphStats, libsql::Error> {
1410        let symbols = {
1411            let mut rows = self.conn.query("SELECT COUNT(*) FROM symbols", ()).await?;
1412            if let Some(row) = rows.next().await? {
1413                u64::try_from(row.get::<i64>(0)?).unwrap_or(0) as usize
1414            } else {
1415                0
1416            }
1417        };
1418        let calls = {
1419            let mut rows = self.conn.query("SELECT COUNT(*) FROM calls", ()).await?;
1420            if let Some(row) = rows.next().await? {
1421                u64::try_from(row.get::<i64>(0)?).unwrap_or(0) as usize
1422            } else {
1423                0
1424            }
1425        };
1426        let imports = {
1427            let mut rows = self.conn.query("SELECT COUNT(*) FROM imports", ()).await?;
1428            if let Some(row) = rows.next().await? {
1429                u64::try_from(row.get::<i64>(0)?).unwrap_or(0) as usize
1430            } else {
1431                0
1432            }
1433        };
1434        Ok(CallGraphStats {
1435            symbols,
1436            calls,
1437            imports,
1438        })
1439    }
1440
1441    /// Load all call edges from the calls table.
1442    /// Returns Vec<(caller_file, caller_symbol, callee_name)>.
1443    /// Used by test-gaps analysis for bulk caller lookup.
1444    pub async fn all_call_edges(&self) -> Result<Vec<(String, String, String)>, libsql::Error> {
1445        let mut rows = self
1446            .conn
1447            .query(
1448                "SELECT caller_file, caller_symbol, callee_name FROM calls",
1449                (),
1450            )
1451            .await?;
1452        let mut edges = Vec::new();
1453        while let Some(row) = rows.next().await? {
1454            edges.push((row.get(0)?, row.get(1)?, row.get(2)?));
1455        }
1456        Ok(edges)
1457    }
1458
1459    /// Load all imports from the imports table.
1460    /// Returns Vec<(file, module, name, line)>.
1461    /// Used by rules for building relations.
1462    pub async fn all_imports(&self) -> Result<Vec<(String, String, String, u32)>, libsql::Error> {
1463        let mut rows = self
1464            .conn
1465            .query("SELECT file, module, name, line FROM imports", ())
1466            .await?;
1467        let mut imports = Vec::new();
1468        while let Some(row) = rows.next().await? {
1469            // module can be NULL in some cases
1470            let module: Option<String> = row.get(1).ok();
1471            imports.push((
1472                row.get(0)?,
1473                module.unwrap_or_default(),
1474                row.get(2)?,
1475                u32::try_from(row.get::<i64>(3)?).unwrap_or(0),
1476            ));
1477        }
1478        Ok(imports)
1479    }
1480
1481    /// Load all resolved import edges from the imports table.
1482    /// Returns Vec<(importer_file, imported_file)> for rows where `resolved_file IS NOT NULL`.
1483    /// The paths are root-relative strings as stored in the database.
1484    /// Used by the daemon to build the reverse-dep graph on startup.
1485    pub async fn all_resolved_import_edges(&self) -> Result<Vec<(String, String)>, libsql::Error> {
1486        let mut rows = self
1487            .conn
1488            .query(
1489                "SELECT file, resolved_file FROM imports WHERE resolved_file IS NOT NULL",
1490                (),
1491            )
1492            .await?;
1493        let mut edges = Vec::new();
1494        while let Some(row) = rows.next().await? {
1495            edges.push((row.get(0)?, row.get(1)?));
1496        }
1497        Ok(edges)
1498    }
1499
1500    /// Load all resolved import edges with line numbers.
1501    /// Returns `Vec<(importer_file, line, resolved_file)>` for rows where
1502    /// `resolved_file IS NOT NULL`. Used by the boundary-violations native rule
1503    /// to check cross-boundary imports with precise source locations.
1504    pub async fn all_resolved_imports_with_lines(
1505        &self,
1506    ) -> Result<Vec<(String, u32, String)>, libsql::Error> {
1507        let mut rows = self
1508            .conn
1509            .query(
1510                "SELECT file, line, resolved_file FROM imports WHERE resolved_file IS NOT NULL",
1511                (),
1512            )
1513            .await?;
1514        let mut edges = Vec::new();
1515        while let Some(row) = rows.next().await? {
1516            let line = u32::try_from(row.get::<i64>(1)?).unwrap_or(0);
1517            edges.push((row.get(0)?, line, row.get(2)?));
1518        }
1519        Ok(edges)
1520    }
1521
1522    /// Count distinct resolved import targets per file (fan-out).
1523    /// Returns `Vec<(file, count)>` ordered by count descending.
1524    /// Only counts rows where `resolved_file IS NOT NULL`.
1525    /// Used by the `high-fan-out` native rule.
1526    pub async fn import_fan_out_by_file(&self) -> Result<Vec<(String, usize)>, libsql::Error> {
1527        let mut rows = self
1528            .conn
1529            .query(
1530                "SELECT file, COUNT(DISTINCT resolved_file) AS cnt \
1531                 FROM imports WHERE resolved_file IS NOT NULL \
1532                 GROUP BY file ORDER BY cnt DESC",
1533                (),
1534            )
1535            .await?;
1536        let mut result = Vec::new();
1537        while let Some(row) = rows.next().await? {
1538            let count = usize::try_from(row.get::<i64>(1)?).unwrap_or(0);
1539            result.push((row.get(0)?, count));
1540        }
1541        Ok(result)
1542    }
1543
1544    /// Count distinct files that import each file (fan-in).
1545    /// Returns `Vec<(file, count)>` ordered by count descending.
1546    /// Only counts rows where `resolved_file IS NOT NULL`.
1547    /// Used by the `high-fan-in` native rule.
1548    pub async fn import_fan_in_by_file(&self) -> Result<Vec<(String, usize)>, libsql::Error> {
1549        let mut rows = self
1550            .conn
1551            .query(
1552                "SELECT resolved_file, COUNT(DISTINCT file) AS cnt \
1553                 FROM imports WHERE resolved_file IS NOT NULL \
1554                 GROUP BY resolved_file ORDER BY cnt DESC",
1555                (),
1556            )
1557            .await?;
1558        let mut result = Vec::new();
1559        while let Some(row) = rows.next().await? {
1560            let count = usize::try_from(row.get::<i64>(1)?).unwrap_or(0);
1561            result.push((row.get(0)?, count));
1562        }
1563        Ok(result)
1564    }
1565
1566    /// Load resolved import edges for a specific importer file (root-relative path).
1567    /// Returns Vec<imported_file> where `resolved_file IS NOT NULL`.
1568    /// Used by the daemon to update outgoing edges for a changed file.
1569    pub async fn resolved_imports_for_file(
1570        &self,
1571        file: &str,
1572    ) -> Result<Vec<String>, libsql::Error> {
1573        let mut rows = self
1574            .conn
1575            .query(
1576                "SELECT resolved_file FROM imports WHERE file = ?1 AND resolved_file IS NOT NULL",
1577                params![file.to_string()],
1578            )
1579            .await?;
1580        let mut targets = Vec::new();
1581        while let Some(row) = rows.next().await? {
1582            targets.push(row.get(0)?);
1583        }
1584        Ok(targets)
1585    }
1586
1587    /// Find the shortest import path(s) from `from` to `to` via BFS over the resolved import graph.
1588    ///
1589    /// `from` and `to` are root-relative path strings (as stored in the DB).
1590    /// Returns all shortest paths (there may be more than one of equal length).
1591    /// If `all_paths` is true, returns all simple paths up to `path_limit` paths
1592    /// and up to `max_depth` hops deep.
1593    /// Returns an empty vec if no path exists.
1594    pub async fn find_import_path(
1595        &self,
1596        from: &str,
1597        to: &str,
1598        all_paths: bool,
1599        path_limit: usize,
1600        max_depth: usize,
1601    ) -> Result<Vec<Vec<String>>, libsql::Error> {
1602        use std::collections::{HashMap, HashSet, VecDeque};
1603
1604        if from == to {
1605            return Ok(vec![vec![from.to_string()]]);
1606        }
1607
1608        // Build adjacency list: file -> set of files it imports
1609        let mut adj: HashMap<String, Vec<String>> = HashMap::new();
1610        let mut rows = self
1611            .conn
1612            .query(
1613                "SELECT file, resolved_file FROM imports WHERE resolved_file IS NOT NULL",
1614                (),
1615            )
1616            .await?;
1617        while let Some(row) = rows.next().await? {
1618            let file: String = row.get(0)?;
1619            let resolved: String = row.get(1)?;
1620            adj.entry(file).or_default().push(resolved);
1621        }
1622
1623        if !all_paths {
1624            // BFS for shortest path
1625            let mut visited: HashMap<String, String> = HashMap::new(); // node -> parent
1626            let mut queue: VecDeque<String> = VecDeque::new();
1627            queue.push_back(from.to_string());
1628            visited.insert(from.to_string(), String::new());
1629
1630            let mut found = false;
1631            'bfs: while let Some(node) = queue.pop_front() {
1632                // Check depth
1633                let depth = {
1634                    let mut d = 0usize;
1635                    let mut cur = &node;
1636                    while let Some(p) = visited.get(cur) {
1637                        if p.is_empty() {
1638                            break;
1639                        }
1640                        d += 1;
1641                        cur = p;
1642                        if d > max_depth {
1643                            break;
1644                        }
1645                    }
1646                    d
1647                };
1648                if depth >= max_depth {
1649                    continue;
1650                }
1651                if let Some(neighbors) = adj.get(&node) {
1652                    for neighbor in neighbors {
1653                        if !visited.contains_key(neighbor.as_str()) {
1654                            visited.insert(neighbor.clone(), node.clone());
1655                            if neighbor == to {
1656                                found = true;
1657                                break 'bfs;
1658                            }
1659                            queue.push_back(neighbor.clone());
1660                        }
1661                    }
1662                }
1663            }
1664
1665            if !found {
1666                return Ok(vec![]);
1667            }
1668
1669            // Reconstruct path by backtracking through visited
1670            let mut path = vec![to.to_string()];
1671            let mut cur = to.to_string();
1672            loop {
1673                let parent = visited.get(&cur).cloned().unwrap_or_default();
1674                if parent.is_empty() {
1675                    break;
1676                }
1677                path.push(parent.clone());
1678                cur = parent;
1679            }
1680            path.reverse();
1681            Ok(vec![path])
1682        } else {
1683            // DFS to find all simple paths up to path_limit
1684            let mut result: Vec<Vec<String>> = Vec::new();
1685            let mut stack: VecDeque<(String, Vec<String>, HashSet<String>)> = VecDeque::new();
1686            let mut initial_visited = HashSet::new();
1687            initial_visited.insert(from.to_string());
1688            stack.push_back((from.to_string(), vec![from.to_string()], initial_visited));
1689
1690            while let Some((node, path, visited)) = stack.pop_back() {
1691                if result.len() >= path_limit {
1692                    break;
1693                }
1694                if path.len() > max_depth + 1 {
1695                    continue;
1696                }
1697                if let Some(neighbors) = adj.get(&node) {
1698                    for neighbor in neighbors {
1699                        if visited.contains(neighbor.as_str()) {
1700                            continue;
1701                        }
1702                        let mut new_path = path.clone();
1703                        new_path.push(neighbor.clone());
1704                        if neighbor == to {
1705                            result.push(new_path);
1706                            if result.len() >= path_limit {
1707                                break;
1708                            }
1709                        } else {
1710                            let mut new_visited = visited.clone();
1711                            new_visited.insert(neighbor.clone());
1712                            stack.push_back((neighbor.clone(), new_path, new_visited));
1713                        }
1714                    }
1715                }
1716            }
1717
1718            Ok(result)
1719        }
1720    }
1721
1722    /// Load all symbol implements from the symbol_implements table.
1723    /// Returns Vec<(file, name, interface)>.
1724    pub async fn all_symbol_implements(
1725        &self,
1726    ) -> Result<Vec<(String, String, String)>, libsql::Error> {
1727        let mut rows = self
1728            .conn
1729            .query("SELECT file, name, interface FROM symbol_implements", ())
1730            .await?;
1731        let mut implements = Vec::new();
1732        while let Some(row) = rows.next().await? {
1733            implements.push((row.get(0)?, row.get(1)?, row.get(2)?));
1734        }
1735        Ok(implements)
1736    }
1737
1738    /// Load all type methods from the type_methods table.
1739    /// Returns Vec<(file, type_name, method_name)>.
1740    pub async fn all_type_methods(&self) -> Result<Vec<(String, String, String)>, libsql::Error> {
1741        let mut rows = self
1742            .conn
1743            .query("SELECT file, type_name, method_name FROM type_methods", ())
1744            .await?;
1745        let mut methods = Vec::new();
1746        while let Some(row) = rows.next().await? {
1747            methods.push((row.get(0)?, row.get(1)?, row.get(2)?));
1748        }
1749        Ok(methods)
1750    }
1751
1752    /// Load all calls with line numbers.
1753    /// Returns Vec<(caller_file, caller_symbol, callee_name, line)>.
1754    /// Used by rules for building relations.
1755    pub async fn all_calls_with_lines(
1756        &self,
1757    ) -> Result<Vec<(String, String, String, u32)>, libsql::Error> {
1758        let mut rows = self
1759            .conn
1760            .query(
1761                "SELECT caller_file, caller_symbol, callee_name, line FROM calls",
1762                (),
1763            )
1764            .await?;
1765        let mut calls = Vec::new();
1766        while let Some(row) = rows.next().await? {
1767            calls.push((
1768                row.get(0)?,
1769                row.get(1)?,
1770                row.get(2)?,
1771                u32::try_from(row.get::<i64>(3)?).unwrap_or(0),
1772            ));
1773        }
1774        Ok(calls)
1775    }
1776
1777    /// Load all symbols from the symbols table with full details.
1778    /// Returns Vec<(file, name, kind, start_line, end_line, parent, visibility, is_impl)>.
1779    /// Used by test-gaps analysis to classify test context.
1780    pub async fn all_symbols_with_details(
1781        &self,
1782    ) -> Result<
1783        Vec<(
1784            String,
1785            String,
1786            String,
1787            usize,
1788            usize,
1789            Option<String>,
1790            String,
1791            bool,
1792        )>,
1793        libsql::Error,
1794    > {
1795        let mut rows = self
1796            .conn
1797            .query(
1798                "SELECT file, name, kind, start_line, end_line, parent, visibility, is_impl FROM symbols",
1799                (),
1800            )
1801            .await?;
1802        let mut symbols = Vec::new();
1803        while let Some(row) = rows.next().await? {
1804            symbols.push((
1805                row.get(0)?,
1806                row.get(1)?,
1807                row.get(2)?,
1808                u64::try_from(row.get::<i64>(3)?).unwrap_or(0) as usize,
1809                u64::try_from(row.get::<i64>(4)?).unwrap_or(0) as usize,
1810                row.get(5).ok(),
1811                row.get::<String>(6)
1812                    .unwrap_or_else(|_| "public".to_string()),
1813                row.get::<i64>(7).unwrap_or(0) != 0,
1814            ));
1815        }
1816        Ok(symbols)
1817    }
1818
1819    /// Load all symbol attributes from the symbol_attributes table.
1820    /// Returns Vec<(file, name, attribute)>.
1821    pub async fn all_symbol_attributes(
1822        &self,
1823    ) -> Result<Vec<(String, String, String)>, libsql::Error> {
1824        let mut rows = self
1825            .conn
1826            .query("SELECT file, name, attribute FROM symbol_attributes", ())
1827            .await?;
1828        let mut attrs = Vec::new();
1829        while let Some(row) = rows.next().await? {
1830            attrs.push((row.get(0)?, row.get(1)?, row.get(2)?));
1831        }
1832        Ok(attrs)
1833    }
1834
1835    /// Load all calls with qualifiers.
1836    /// Returns Vec<(caller_file, caller_symbol, callee_name, callee_qualifier, line)>.
1837    pub async fn all_calls_with_qualifiers(
1838        &self,
1839    ) -> Result<Vec<(String, String, String, Option<String>, u32)>, libsql::Error> {
1840        let mut rows = self
1841            .conn
1842            .query(
1843                "SELECT caller_file, caller_symbol, callee_name, callee_qualifier, line FROM calls",
1844                (),
1845            )
1846            .await?;
1847        let mut calls = Vec::new();
1848        while let Some(row) = rows.next().await? {
1849            calls.push((
1850                row.get(0)?,
1851                row.get(1)?,
1852                row.get(2)?,
1853                row.get(3).ok(),
1854                u32::try_from(row.get::<i64>(4)?).unwrap_or(0),
1855            ));
1856        }
1857        Ok(calls)
1858    }
1859
1860    /// Convert a module name to possible file paths using the language's trait method.
1861    /// Returns only paths that exist in the index.
1862    pub async fn module_to_files(&self, module: &str, source_file: &str) -> Vec<String> {
1863        // Get language from the source file extension
1864        let lang = match support_for_path(Path::new(source_file)) {
1865            Some(l) => l,
1866            None => return vec![],
1867        };
1868
1869        // Get local deps implementation for this language
1870        let deps = match normalize_local_deps::registry::deps_for_language(lang.name()) {
1871            Some(d) => d,
1872            None => return vec![],
1873        };
1874
1875        // First try resolve_local_import which handles crate::, super::, self:: properly
1876        let source_path = self.root.join(source_file);
1877        if let Some(resolved) = deps.resolve_local_import(module, &source_path, &self.root) {
1878            // Convert absolute path back to relative path for index lookup
1879            if let Ok(rel_path) = resolved.strip_prefix(&self.root) {
1880                let rel_str = rel_path.to_string_lossy().to_string();
1881                // Verify it exists in index
1882                if let Ok(mut rows) = self
1883                    .conn
1884                    .query(
1885                        "SELECT 1 FROM files WHERE path = ?1",
1886                        params![rel_str.clone()],
1887                    )
1888                    .await
1889                    && rows.next().await.ok().flatten().is_some()
1890                {
1891                    return vec![rel_str];
1892                }
1893            }
1894        }
1895
1896        // Fall back to module_name_to_paths for simpler lookups
1897        let candidates = deps.module_name_to_paths(module);
1898
1899        // Filter to files that exist in index
1900        let mut result = Vec::new();
1901        for path in candidates {
1902            let mut rows = match self
1903                .conn
1904                .query("SELECT 1 FROM files WHERE path = ?1", params![path.clone()])
1905                .await
1906            {
1907                Ok(r) => r,
1908                Err(_) => continue,
1909            };
1910            if rows.next().await.ok().flatten().is_some() {
1911                result.push(path);
1912            }
1913        }
1914        result
1915    }
1916
1917    /// Resolve all unresolved import rows by populating `resolved_file`.
1918    ///
1919    /// For each import row where `module IS NOT NULL` and `resolved_file IS NULL`,
1920    /// calls `module_to_files()` to convert the module specifier to a project-relative
1921    /// file path and writes it back. Rows that cannot be resolved (external packages,
1922    /// stdlib, unknown modules) keep `resolved_file = NULL`.
1923    ///
1924    /// Safe to call multiple times — only processes rows with `resolved_file IS NULL`.
1925    pub async fn resolve_all_imports(&self) -> Result<usize, libsql::Error> {
1926        // Collect distinct (file, module) pairs that still need resolution.
1927        // We can't mutate while iterating, so collect first.
1928        let mut rows = self
1929            .conn
1930            .query(
1931                "SELECT DISTINCT file, module FROM imports WHERE module IS NOT NULL AND resolved_file IS NULL",
1932                (),
1933            )
1934            .await?;
1935        let mut pending: Vec<(String, String)> = Vec::new();
1936        while let Some(row) = rows.next().await? {
1937            pending.push((row.get(0)?, row.get(1)?));
1938        }
1939
1940        let mut resolved_count = 0;
1941        for (file, module) in pending {
1942            let files = self.module_to_files(&module, &file).await;
1943            if let Some(resolved_file) = files.first() {
1944                self.conn
1945                    .execute(
1946                        "UPDATE imports SET resolved_file = ?1 WHERE file = ?2 AND module = ?3 AND resolved_file IS NULL",
1947                        params![resolved_file.clone(), file.clone(), module.clone()],
1948                    )
1949                    .await?;
1950                resolved_count += 1;
1951            }
1952        }
1953        Ok(resolved_count)
1954    }
1955
1956    /// Follow re-export chains to resolve imports to their ultimate source file.
1957    ///
1958    /// When file A imports `Foo` from file B, but file B re-exports `Foo` from file C
1959    /// (via `pub use c::Foo` in Rust or `export { Foo } from './c'` in TypeScript),
1960    /// this updates A's import row so `resolved_file` points to C instead of B.
1961    ///
1962    /// Runs iteratively (up to `max_depth` passes) to handle chains longer than one hop,
1963    /// stopping early when no rows are updated. Wildcard re-exports (`pub use mod::*`)
1964    /// are handled by following any re-export from the intermediate file.
1965    pub async fn trace_reexports(&self) -> Result<usize, libsql::Error> {
1966        let max_depth = 10usize;
1967        let mut total_updated = 0usize;
1968
1969        for _ in 0..max_depth {
1970            // For each import row whose resolved_file re-exports the imported name
1971            // (or re-exports via wildcard), update resolved_file to point to the
1972            // re-export's own resolved_file (the ultimate source).
1973            //
1974            // A re-export in file B for name N means: imports row where
1975            //   file = B, name = N (or name = '*'), is_reexport = 1, resolved_file IS NOT NULL
1976            //
1977            // We look for imports in A where:
1978            //   resolved_file = B  AND  B has a matching re-export row with its own resolved_file
1979            let updated = self
1980                .conn
1981                .execute(
1982                    "UPDATE imports AS consumer
1983                     SET resolved_file = (
1984                         SELECT reexp.resolved_file
1985                         FROM imports AS reexp
1986                         WHERE reexp.file = consumer.resolved_file
1987                           AND reexp.is_reexport = 1
1988                           AND reexp.resolved_file IS NOT NULL
1989                           AND reexp.resolved_file != consumer.resolved_file
1990                           AND (
1991                               reexp.name = consumer.name
1992                               OR COALESCE(reexp.alias, reexp.name) = consumer.name
1993                               OR reexp.name = '*'
1994                           )
1995                         LIMIT 1
1996                     )
1997                     WHERE consumer.resolved_file IS NOT NULL
1998                       AND EXISTS (
1999                           SELECT 1 FROM imports AS reexp2
2000                           WHERE reexp2.file = consumer.resolved_file
2001                             AND reexp2.is_reexport = 1
2002                             AND reexp2.resolved_file IS NOT NULL
2003                             AND reexp2.resolved_file != consumer.resolved_file
2004                             AND (
2005                                 reexp2.name = consumer.name
2006                                 OR COALESCE(reexp2.alias, reexp2.name) = consumer.name
2007                                 OR reexp2.name = '*'
2008                             )
2009                       )",
2010                    (),
2011                )
2012                .await? as usize;
2013
2014            total_updated += updated;
2015            if updated == 0 {
2016                break;
2017            }
2018        }
2019
2020        Ok(total_updated)
2021    }
2022
2023    /// Resolve call targets: for each call, try to determine which file defines the callee.
2024    ///
2025    /// Uses the import graph: if caller_file imports a name that matches callee_name (or its alias),
2026    /// and that import has a resolved_file, set callee_resolved_file on the call row.
2027    /// Same-file calls (caller_file has a symbol matching callee_name) also get resolved.
2028    pub async fn resolve_all_calls(&self) -> Result<usize, libsql::Error> {
2029        let mut resolved = 0usize;
2030
2031        // 1. Same-file calls: callee defined in the same file as the caller
2032        resolved += self
2033            .conn
2034            .execute(
2035                "UPDATE calls SET callee_resolved_file = caller_file
2036                 WHERE callee_resolved_file IS NULL
2037                   AND callee_qualifier IS NULL
2038                   AND EXISTS (
2039                       SELECT 1 FROM symbols
2040                       WHERE symbols.file = calls.caller_file
2041                         AND symbols.name = calls.callee_name
2042                   )",
2043                (),
2044            )
2045            .await? as usize;
2046
2047        // 2. Import-resolved calls: callee_name matches an import name (or alias)
2048        //    that has a resolved_file
2049        resolved += self
2050            .conn
2051            .execute(
2052                "UPDATE calls SET callee_resolved_file = (
2053                     SELECT i.resolved_file FROM imports i
2054                     WHERE i.file = calls.caller_file
2055                       AND calls.callee_name = COALESCE(i.alias, i.name)
2056                       AND i.resolved_file IS NOT NULL
2057                     LIMIT 1
2058                 )
2059                 WHERE callee_resolved_file IS NULL
2060                   AND callee_qualifier IS NULL
2061                   AND EXISTS (
2062                       SELECT 1 FROM imports i
2063                       WHERE i.file = calls.caller_file
2064                         AND calls.callee_name = COALESCE(i.alias, i.name)
2065                         AND i.resolved_file IS NOT NULL
2066                   )",
2067                (),
2068            )
2069            .await? as usize;
2070
2071        // 3. Qualifier-resolved calls: callee_qualifier matches an import name (or alias)
2072        //    e.g., `module.foo()` where `module` is imported
2073        resolved += self
2074            .conn
2075            .execute(
2076                "UPDATE calls SET callee_resolved_file = (
2077                     SELECT i.resolved_file FROM imports i
2078                     WHERE i.file = calls.caller_file
2079                       AND calls.callee_qualifier = COALESCE(i.alias, i.name)
2080                       AND i.resolved_file IS NOT NULL
2081                     LIMIT 1
2082                 )
2083                 WHERE callee_resolved_file IS NULL
2084                   AND callee_qualifier IS NOT NULL
2085                   AND callee_qualifier != 'self'
2086                   AND EXISTS (
2087                       SELECT 1 FROM imports i
2088                       WHERE i.file = calls.caller_file
2089                         AND calls.callee_qualifier = COALESCE(i.alias, i.name)
2090                         AND i.resolved_file IS NOT NULL
2091                   )",
2092                (),
2093            )
2094            .await? as usize;
2095
2096        // 4. Self-calls: `self.method()` — resolve to the file containing the parent type
2097        //    The caller's parent type is in the same file, so resolve to caller_file.
2098        resolved += self
2099            .conn
2100            .execute(
2101                "UPDATE calls SET callee_resolved_file = caller_file
2102                 WHERE callee_resolved_file IS NULL
2103                   AND callee_qualifier = 'self'",
2104                (),
2105            )
2106            .await? as usize;
2107
2108        Ok(resolved)
2109    }
2110
2111    /// Check if a file exports (defines) a given symbol
2112    async fn file_exports_symbol(&self, file: &str, symbol: &str) -> Result<bool, libsql::Error> {
2113        // Check if symbol is defined in this file (top-level only, parent IS NULL)
2114        let mut rows = self
2115            .conn
2116            .query(
2117                "SELECT COUNT(*) FROM symbols WHERE file = ?1 AND name = ?2 AND parent IS NULL",
2118                params![file, symbol],
2119            )
2120            .await?;
2121        if let Some(row) = rows.next().await? {
2122            let count: i64 = row.get(0)?;
2123            Ok(count > 0)
2124        } else {
2125            Ok(false)
2126        }
2127    }
2128
2129    /// Resolve a name in a file's context to its source module
2130    /// Returns: (source_module, original_name) if found
2131    pub async fn resolve_import(
2132        &self,
2133        file: &str,
2134        name: &str,
2135    ) -> Result<Option<(String, String)>, libsql::Error> {
2136        // Check for direct import or alias
2137        let mut rows = self
2138            .conn
2139            .query(
2140                "SELECT module, name FROM imports WHERE file = ?1 AND (name = ?2 OR alias = ?2)",
2141                params![file, name],
2142            )
2143            .await?;
2144
2145        if let Some(row) = rows.next().await? {
2146            let module: Option<String> = row.get(0)?;
2147            let orig_name: String = row.get(1)?;
2148            if let Some(module) = module {
2149                return Ok(Some((module, orig_name)));
2150            } else {
2151                // Plain import (import X), module is the name
2152                return Ok(Some((orig_name.clone(), orig_name)));
2153            }
2154        }
2155
2156        // Check for wildcard imports - name could come from any of them
2157        let mut rows = self
2158            .conn
2159            .query(
2160                "SELECT module FROM imports WHERE file = ?1 AND name = '*'",
2161                params![file],
2162            )
2163            .await?;
2164        let mut wildcards = Vec::new();
2165        while let Some(row) = rows.next().await? {
2166            if let Ok(Some(module)) = row.get::<Option<String>>(0) {
2167                wildcards.push(module);
2168            }
2169        }
2170
2171        // Check each wildcard source to see if it exports the symbol
2172        for module in &wildcards {
2173            let files = self.module_to_files(module, file).await;
2174            for module_file in files {
2175                if self.file_exports_symbol(&module_file, name).await? {
2176                    return Ok(Some((module.clone(), name.to_string())));
2177                }
2178            }
2179        }
2180
2181        // Fallback: if we have wildcards but couldn't verify, return first as possibility
2182        // This handles external modules (stdlib, third-party) we can't resolve
2183        if !wildcards.is_empty() {
2184            return Ok(Some((wildcards[0].clone(), name.to_string())));
2185        }
2186
2187        Ok(None)
2188    }
2189
2190    /// Find which files import a given module
2191    pub async fn find_importers(
2192        &self,
2193        module: &str,
2194    ) -> Result<Vec<(String, String, usize)>, libsql::Error> {
2195        let pattern = format!("{}%", module);
2196        let mut rows = self
2197            .conn
2198            .query(
2199                "SELECT file, name, line FROM imports WHERE module = ?1 OR module LIKE ?2",
2200                params![module, pattern],
2201            )
2202            .await?;
2203        let mut importers = Vec::new();
2204        while let Some(row) = rows.next().await? {
2205            importers.push((
2206                row.get(0)?,
2207                row.get(1)?,
2208                u64::try_from(row.get::<i64>(2)?).unwrap_or(0) as usize,
2209            ));
2210        }
2211        Ok(importers)
2212    }
2213
2214    /// Check whether a file already has an import named `name` (as `name` or `alias`).
2215    /// Used for rename conflict detection.
2216    pub async fn has_import_named(&self, file: &str, name: &str) -> Result<bool, libsql::Error> {
2217        let mut rows = self
2218            .conn
2219            .query(
2220                "SELECT COUNT(*) FROM imports WHERE file = ?1 AND (name = ?2 OR alias = ?2)",
2221                params![file, name],
2222            )
2223            .await?;
2224        if let Some(row) = rows.next().await? {
2225            let count: i64 = row.get(0)?;
2226            Ok(count > 0)
2227        } else {
2228            Ok(false)
2229        }
2230    }
2231
2232    /// Find files that import a specific symbol by name.
2233    /// Returns: (file, imported_name, alias, line)
2234    /// Useful for rename: find all files that need their import statement updated.
2235    pub async fn find_symbol_importers(
2236        &self,
2237        symbol_name: &str,
2238    ) -> Result<Vec<(String, String, Option<String>, usize)>, libsql::Error> {
2239        let mut rows = self
2240            .conn
2241            .query(
2242                "SELECT file, name, alias, line FROM imports WHERE name = ?1",
2243                params![symbol_name],
2244            )
2245            .await?;
2246        let mut importers = Vec::new();
2247        while let Some(row) = rows.next().await? {
2248            importers.push((
2249                row.get(0)?,
2250                row.get(1)?,
2251                row.get(2)?,
2252                u64::try_from(row.get::<i64>(3)?).unwrap_or(0) as usize,
2253            ));
2254        }
2255        Ok(importers)
2256    }
2257
2258    /// Find files that import a specific symbol by name, including the module path.
2259    /// Returns: (file, imported_name, alias, line, module)
2260    /// Useful for `move`: the recipe needs the original module string so it can rewrite
2261    /// it to the new path verbatim, rather than guessing where the path begins/ends.
2262    pub async fn find_symbol_importers_with_module(
2263        &self,
2264        symbol_name: &str,
2265    ) -> Result<Vec<(String, String, Option<String>, usize, Option<String>)>, libsql::Error> {
2266        let mut rows = self
2267            .conn
2268            .query(
2269                "SELECT file, name, alias, line, module FROM imports WHERE name = ?1",
2270                params![symbol_name],
2271            )
2272            .await?;
2273        let mut importers = Vec::new();
2274        while let Some(row) = rows.next().await? {
2275            importers.push((
2276                row.get(0)?,
2277                row.get(1)?,
2278                row.get(2)?,
2279                u64::try_from(row.get::<i64>(3)?).unwrap_or(0) as usize,
2280                row.get(4)?,
2281            ));
2282        }
2283        Ok(importers)
2284    }
2285
2286    /// Get method names for a type (interface/class) in a specific file.
2287    /// Used for cross-file interface implementation detection.
2288    pub async fn get_type_methods(
2289        &self,
2290        file: &str,
2291        type_name: &str,
2292    ) -> Result<Vec<String>, libsql::Error> {
2293        let mut rows = self
2294            .conn
2295            .query(
2296                "SELECT method_name FROM type_methods WHERE file = ?1 AND type_name = ?2",
2297                params![file, type_name],
2298            )
2299            .await?;
2300        let mut methods = Vec::new();
2301        while let Some(row) = rows.next().await? {
2302            methods.push(row.get(0)?);
2303        }
2304        Ok(methods)
2305    }
2306
2307    /// Find files that define a type by name.
2308    /// Returns all files that have a type (interface/class) with the given name.
2309    pub async fn find_type_definitions(
2310        &self,
2311        type_name: &str,
2312    ) -> Result<Vec<String>, libsql::Error> {
2313        let mut rows = self
2314            .conn
2315            .query(
2316                "SELECT DISTINCT file FROM type_methods WHERE type_name = ?1",
2317                params![type_name],
2318            )
2319            .await?;
2320        let mut files = Vec::new();
2321        while let Some(row) = rows.next().await? {
2322            files.push(row.get(0)?);
2323        }
2324        Ok(files)
2325    }
2326
2327    /// Refresh the call graph by parsing all supported source files
2328    /// This is more expensive than file refresh since it parses every file
2329    /// Uses parallel processing for parsing, sequential insertion for SQLite
2330    pub async fn refresh_call_graph(&mut self) -> Result<CallGraphStats, libsql::Error> {
2331        // Get all indexed source files
2332        let files: Vec<String> = {
2333            let sql = format!(
2334                "SELECT path FROM files WHERE is_dir = 0 AND ({})",
2335                source_extensions_sql_filter()
2336            );
2337            let mut rows = self.conn.query(&sql, ()).await?;
2338            let mut files = Vec::new();
2339            while let Some(row) = rows.next().await? {
2340                let path: String = row.get(0)?;
2341                files.push(path);
2342            }
2343            files
2344        };
2345
2346        // Parse all files in parallel
2347        // Each thread gets its own SymbolParser (tree-sitter parsers have mutable state)
2348        let root = self.root.clone();
2349
2350        // Pre-pass: check CA cache for all files (serial, fast disk reads)
2351        let mut cached_data: Vec<ParsedFileData> = Vec::new();
2352        let mut uncached_files: Vec<String> = Vec::new();
2353
2354        for file_path in &files {
2355            let full_path = root.join(file_path);
2356            let bytes = match std::fs::read(&full_path) {
2357                Ok(b) => b,
2358                Err(_) => {
2359                    uncached_files.push(file_path.clone());
2360                    continue;
2361                }
2362            };
2363            let grammar = match support_for_path(&full_path) {
2364                Some(s) => s.grammar_name().to_string(),
2365                None => {
2366                    uncached_files.push(file_path.clone());
2367                    continue;
2368                }
2369            };
2370            let hash = blake3::hash(&bytes);
2371            if let Some(ca) = &self.ca_cache {
2372                match ca.get::<CachedFileData>(hash.as_bytes(), EXTRACTOR_VERSION, &grammar) {
2373                    Ok(Some(cached)) => {
2374                        cached_data.push(ParsedFileData {
2375                            file_path: file_path.clone(),
2376                            symbols: cached.symbols,
2377                            calls: cached.calls,
2378                            imports: cached.imports,
2379                            type_methods: cached.type_methods,
2380                            type_refs: cached.type_refs,
2381                        });
2382                        continue;
2383                    }
2384                    Ok(None) => {}
2385                    Err(e) => {
2386                        tracing::warn!("normalize-facts: CA cache get error: {}", e);
2387                    }
2388                }
2389            }
2390            uncached_files.push(file_path.clone());
2391        }
2392
2393        let ca_cache_for_rayon = self.ca_cache.clone();
2394
2395        let pb = if self.progress && std::io::IsTerminal::is_terminal(&std::io::stderr()) {
2396            let pb = ProgressBar::new(uncached_files.len() as u64);
2397            pb.set_style(
2398                ProgressStyle::with_template(
2399                    "{spinner:.cyan} Parsing symbols... [{bar:30.cyan/dim}] {pos}/{len} files [{elapsed_precise}]",
2400                )
2401                .unwrap_or_else(|_| ProgressStyle::default_bar())
2402                .progress_chars("##-"),
2403            );
2404            pb
2405        } else {
2406            ProgressBar::hidden()
2407        };
2408        let mut parsed_data: Vec<ParsedFileData> = uncached_files
2409            .par_iter()
2410            .progress_with(pb.clone())
2411            .filter_map(|file_path| {
2412                let full_path = root.join(file_path);
2413                let bytes = std::fs::read(&full_path).ok()?;
2414                let content = String::from_utf8_lossy(&bytes).into_owned();
2415
2416                let grammar = support_for_path(&full_path)
2417                    .map(|s| s.grammar_name().to_string())
2418                    .unwrap_or_default();
2419                let hash = blake3::hash(&bytes);
2420
2421                // Each thread creates its own parser
2422                let mut parser = SymbolParser::new();
2423
2424                // parse_file returns None when the grammar .so is unavailable.
2425                // In that case, skip the file entirely — don't index it as empty.
2426                // The missing grammar is already recorded in `parsers::report_missing_grammar`
2427                // (called from `parse_file` -> `try_get_grammar`), so callers can summarise.
2428                let symbols = parser.parse_file(&full_path, &content)?;
2429
2430                let mut sym_data = Vec::with_capacity(symbols.len());
2431                let mut call_data = Vec::new();
2432
2433                for sym in &symbols {
2434                    sym_data.push(ParsedSymbol {
2435                        name: sym.name.clone(),
2436                        kind: sym.kind.as_str().to_string(),
2437                        start_line: sym.start_line,
2438                        end_line: sym.end_line,
2439                        parent: sym.parent.clone(),
2440                        visibility: sym.visibility.as_str().to_string(),
2441                        attributes: sym.attributes.clone(),
2442                        is_interface_impl: sym.is_interface_impl,
2443                        implements: sym.implements.clone(),
2444                        docstring: sym.docstring.clone(),
2445                    });
2446
2447                    // Only index calls for functions/methods
2448                    let kind = sym.kind.as_str();
2449                    if kind == "function" || kind == "method" {
2450                        let calls = parser.find_callees_for_symbol(&full_path, &content, sym);
2451                        for (callee_name, line, qualifier, access) in calls {
2452                            call_data.push((
2453                                sym.name.clone(),
2454                                callee_name,
2455                                qualifier,
2456                                access,
2457                                line,
2458                            ));
2459                        }
2460                    }
2461                }
2462
2463                // Parse imports using trait-based extraction (works for all supported languages)
2464                let imports = parser.parse_imports(&full_path, &content);
2465
2466                // Extract type methods for cross-file interface resolution
2467                // We need to use the full symbol extraction to get hierarchy
2468                let extractor = crate::extract::Extractor::new();
2469                let extract_result = extractor.extract(&full_path, &content);
2470                let mut type_methods = Vec::new();
2471                for sym in &extract_result.symbols {
2472                    if matches!(
2473                        sym.kind,
2474                        normalize_languages::SymbolKind::Interface
2475                            | normalize_languages::SymbolKind::Class
2476                            | normalize_languages::SymbolKind::Trait
2477                            | normalize_languages::SymbolKind::Struct
2478                    ) {
2479                        for child in &sym.children {
2480                            if matches!(
2481                                child.kind,
2482                                normalize_languages::SymbolKind::Method
2483                                    | normalize_languages::SymbolKind::Function
2484                            ) {
2485                                type_methods.push((sym.name.clone(), child.name.clone()));
2486                            }
2487                        }
2488                    }
2489                }
2490
2491                // Extract type references using tree-sitter queries
2492                let type_refs = parser.find_type_refs(&full_path, &content);
2493
2494                // Store result in CA cache (best-effort).
2495                // Grammar availability is already guaranteed above (parse_file returned Some),
2496                // so empty results here are legitimate and safe to cache.
2497                if !grammar.is_empty()
2498                    && let Some(ca) = &ca_cache_for_rayon
2499                {
2500                    let cached = CachedFileData {
2501                        symbols: sym_data
2502                            .iter()
2503                            .map(|s| ParsedSymbol {
2504                                name: s.name.clone(),
2505                                kind: s.kind.clone(),
2506                                start_line: s.start_line,
2507                                end_line: s.end_line,
2508                                parent: s.parent.clone(),
2509                                visibility: s.visibility.clone(),
2510                                attributes: s.attributes.clone(),
2511                                is_interface_impl: s.is_interface_impl,
2512                                implements: s.implements.clone(),
2513                                docstring: s.docstring.clone(),
2514                            })
2515                            .collect(),
2516                        calls: call_data.clone(),
2517                        imports: imports.clone(),
2518                        type_methods: type_methods.clone(),
2519                        type_refs: type_refs.clone(),
2520                    };
2521                    if let Err(e) = ca.put(hash.as_bytes(), EXTRACTOR_VERSION, &grammar, &cached) {
2522                        tracing::warn!("normalize-facts: CA cache put error: {}", e);
2523                    }
2524                }
2525
2526                Some(ParsedFileData {
2527                    file_path: file_path.clone(),
2528                    symbols: sym_data,
2529                    calls: call_data,
2530                    imports,
2531                    type_methods,
2532                    type_refs,
2533                })
2534            })
2535            .collect();
2536
2537        // Merge CA-cached results
2538        parsed_data.extend(cached_data);
2539
2540        pb.finish_and_clear();
2541
2542        let pb_insert = if self.progress && std::io::IsTerminal::is_terminal(&std::io::stderr()) {
2543            let pb = ProgressBar::new(parsed_data.len() as u64);
2544            pb.set_style(
2545                ProgressStyle::with_template(
2546                    "{spinner:.cyan} Storing index... [{bar:30.cyan/dim}] {pos}/{len} files [{elapsed_precise}]",
2547                )
2548                .unwrap_or_else(|_| ProgressStyle::default_bar())
2549                .progress_chars("##-"),
2550            );
2551            pb
2552        } else {
2553            ProgressBar::hidden()
2554        };
2555
2556        self.conn.execute("BEGIN", ()).await?;
2557
2558        // Clear existing data
2559        self.conn.execute("DELETE FROM symbols", ()).await?;
2560        self.conn.execute("DELETE FROM calls", ()).await?;
2561        self.conn.execute("DELETE FROM imports", ()).await?;
2562        self.conn.execute("DELETE FROM type_methods", ()).await?;
2563        self.conn.execute("DELETE FROM type_refs", ()).await?;
2564        self.conn
2565            .execute("DELETE FROM symbol_attributes", ())
2566            .await?;
2567        self.conn
2568            .execute("DELETE FROM symbol_implements", ())
2569            .await?;
2570
2571        let mut symbol_count = 0;
2572        let mut call_count = 0;
2573        let mut import_count = 0;
2574
2575        for data in &parsed_data {
2576            for sym in &data.symbols {
2577                self.conn.execute(
2578                    "INSERT INTO symbols (file, name, kind, start_line, end_line, parent, visibility, is_impl) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
2579                    params![data.file_path.clone(), sym.name.clone(), sym.kind.clone(), sym.start_line as i64, sym.end_line as i64, sym.parent.clone(), sym.visibility.clone(), sym.is_interface_impl as i64],
2580                ).await?;
2581                for attr in &sym.attributes {
2582                    self.conn.execute(
2583                        "INSERT INTO symbol_attributes (file, name, attribute) VALUES (?1, ?2, ?3)",
2584                        params![data.file_path.clone(), sym.name.clone(), attr.clone()],
2585                    ).await?;
2586                }
2587                if let Some(doc) = &sym.docstring {
2588                    self.conn.execute(
2589                        "INSERT INTO symbol_attributes (file, name, attribute) VALUES (?1, ?2, ?3)",
2590                        params![data.file_path.clone(), sym.name.clone(), format!("doc:{doc}")],
2591                    ).await?;
2592                }
2593                for iface in &sym.implements {
2594                    self.conn.execute(
2595                        "INSERT INTO symbol_implements (file, name, interface) VALUES (?1, ?2, ?3)",
2596                        params![data.file_path.clone(), sym.name.clone(), iface.clone()],
2597                    ).await?;
2598                }
2599                symbol_count += 1;
2600            }
2601
2602            for (caller_symbol, callee_name, qualifier, access, line) in &data.calls {
2603                self.conn.execute(
2604                    "INSERT INTO calls (caller_file, caller_symbol, callee_name, callee_qualifier, access, line) VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
2605                    params![data.file_path.clone(), caller_symbol.clone(), callee_name.clone(), qualifier.clone(), access.clone(), *line as i64],
2606                ).await?;
2607                call_count += 1;
2608            }
2609
2610            for imp in &data.imports {
2611                self.conn.execute(
2612                    "INSERT INTO imports (file, module, name, alias, line, is_reexport) VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
2613                    params![data.file_path.clone(), imp.module.clone(), imp.name.clone(), imp.alias.clone(), imp.line as i64, imp.is_reexport as i64],
2614                ).await?;
2615                import_count += 1;
2616            }
2617
2618            for (type_name, method_name) in &data.type_methods {
2619                self.conn.execute(
2620                    "INSERT OR IGNORE INTO type_methods (file, type_name, method_name) VALUES (?1, ?2, ?3)",
2621                    params![data.file_path.clone(), type_name.clone(), method_name.clone()],
2622                ).await?;
2623            }
2624
2625            for tr in &data.type_refs {
2626                self.conn.execute(
2627                    "INSERT INTO type_refs (file, source_symbol, target_type, kind, line) VALUES (?1, ?2, ?3, ?4, ?5)",
2628                    params![data.file_path.clone(), tr.source_symbol.clone(), tr.target_type.clone(), tr.kind.as_str(), tr.line as i64],
2629                ).await?;
2630            }
2631            pb_insert.inc(1);
2632        }
2633
2634        pb_insert.finish_and_clear();
2635
2636        self.conn.execute("COMMIT", ()).await?;
2637
2638        // Resolve import module specifiers to root-relative file paths now that all
2639        // files are indexed. Must run after COMMIT so module_to_files() can query them.
2640        self.resolve_all_imports().await.unwrap_or_else(|e| {
2641            tracing::warn!("normalize-facts: resolve_all_imports error: {}", e);
2642            0
2643        });
2644        // Follow re-export chains so imports resolve to ultimate source files.
2645        self.trace_reexports().await.unwrap_or_else(|e| {
2646            tracing::warn!("normalize-facts: trace_reexports error: {}", e);
2647            0
2648        });
2649        // Resolve call targets using the now-populated import graph.
2650        self.resolve_all_calls().await.unwrap_or_else(|e| {
2651            tracing::warn!("normalize-facts: resolve_all_calls error: {}", e);
2652            0
2653        });
2654
2655        Ok(CallGraphStats {
2656            symbols: symbol_count,
2657            calls: call_count,
2658            imports: import_count,
2659        })
2660    }
2661
2662    /// Reindex specific files: delete old data and re-extract symbols/calls/imports.
2663    /// Expects to be called inside a transaction.
2664    async fn reindex_files(
2665        &self,
2666        deleted_files: &[String],
2667        changed_files: &[String],
2668    ) -> Result<CallGraphStats, libsql::Error> {
2669        // Remove data for deleted/modified files
2670        for path in deleted_files.iter().chain(changed_files.iter()) {
2671            self.conn
2672                .execute("DELETE FROM symbols WHERE file = ?1", params![path.clone()])
2673                .await?;
2674            self.conn
2675                .execute(
2676                    "DELETE FROM calls WHERE caller_file = ?1",
2677                    params![path.clone()],
2678                )
2679                .await?;
2680            self.conn
2681                .execute("DELETE FROM imports WHERE file = ?1", params![path.clone()])
2682                .await?;
2683            self.conn
2684                .execute(
2685                    "DELETE FROM symbol_attributes WHERE file = ?1",
2686                    params![path.clone()],
2687                )
2688                .await?;
2689            self.conn
2690                .execute(
2691                    "DELETE FROM symbol_implements WHERE file = ?1",
2692                    params![path.clone()],
2693                )
2694                .await?;
2695            self.conn
2696                .execute(
2697                    "DELETE FROM type_refs WHERE file = ?1",
2698                    params![path.clone()],
2699                )
2700                .await?;
2701        }
2702
2703        let mut parser = SymbolParser::new();
2704        let mut symbol_count = 0;
2705        let mut call_count = 0;
2706        let mut import_count = 0;
2707
2708        // Parse changed files
2709        for file_path in changed_files {
2710            let full_path = self.root.join(file_path);
2711            let bytes = match std::fs::read(&full_path) {
2712                Ok(b) => b,
2713                Err(_) => continue,
2714            };
2715
2716            let grammar = support_for_path(&full_path)
2717                .map(|s| s.grammar_name().to_string())
2718                .unwrap_or_default();
2719            let hash = blake3::hash(&bytes);
2720
2721            // Try CA cache first (best-effort)
2722            let cached: Option<CachedFileData> = if !grammar.is_empty() {
2723                self.ca_cache.as_ref().and_then(|ca| {
2724                    ca.get::<CachedFileData>(hash.as_bytes(), EXTRACTOR_VERSION, &grammar)
2725                        .unwrap_or_else(|e| {
2726                            tracing::warn!("normalize-facts: CA cache get error: {}", e);
2727                            None
2728                        })
2729                })
2730            } else {
2731                None
2732            };
2733
2734            let (sym_data, call_data, imports, type_refs) = if let Some(c) = cached {
2735                (c.symbols, c.calls, c.imports, c.type_refs)
2736            } else {
2737                let content = String::from_utf8_lossy(&bytes).into_owned();
2738
2739                // parse_file returns None when the grammar .so is unavailable.
2740                // Skip the file entirely — don't index it as empty.
2741                // The missing grammar is already recorded in `parsers::report_missing_grammar`
2742                // (called from `parse_file` -> `try_get_grammar`), so callers can summarise.
2743                let symbols = match parser.parse_file(&full_path, &content) {
2744                    Some(s) => s,
2745                    None => continue,
2746                };
2747
2748                let mut sym_data = Vec::with_capacity(symbols.len());
2749                let mut call_data_local: Vec<CallEntry> = Vec::new();
2750
2751                for sym in &symbols {
2752                    sym_data.push(ParsedSymbol {
2753                        name: sym.name.clone(),
2754                        kind: sym.kind.as_str().to_string(),
2755                        start_line: sym.start_line,
2756                        end_line: sym.end_line,
2757                        parent: sym.parent.clone(),
2758                        visibility: sym.visibility.as_str().to_string(),
2759                        attributes: sym.attributes.clone(),
2760                        is_interface_impl: sym.is_interface_impl,
2761                        implements: sym.implements.clone(),
2762                        docstring: sym.docstring.clone(),
2763                    });
2764                    let kind = sym.kind.as_str();
2765                    if kind == "function" || kind == "method" {
2766                        let calls = parser.find_callees_for_symbol(&full_path, &content, sym);
2767                        for (callee_name, line, qualifier, access) in calls {
2768                            call_data_local.push((
2769                                sym.name.clone(),
2770                                callee_name,
2771                                qualifier,
2772                                access,
2773                                line,
2774                            ));
2775                        }
2776                    }
2777                }
2778
2779                let imports = parser.parse_imports(&full_path, &content);
2780                let type_refs = parser.find_type_refs(&full_path, &content);
2781
2782                // Store in CA cache (best-effort).
2783                // Grammar availability is already guaranteed above (parse_file returned Some),
2784                // so empty results here are legitimate and safe to cache.
2785                if !grammar.is_empty()
2786                    && let Some(ca) = &self.ca_cache
2787                {
2788                    let cached_store = CachedFileData {
2789                        symbols: sym_data
2790                            .iter()
2791                            .map(|s| ParsedSymbol {
2792                                name: s.name.clone(),
2793                                kind: s.kind.clone(),
2794                                start_line: s.start_line,
2795                                end_line: s.end_line,
2796                                parent: s.parent.clone(),
2797                                visibility: s.visibility.clone(),
2798                                attributes: s.attributes.clone(),
2799                                is_interface_impl: s.is_interface_impl,
2800                                implements: s.implements.clone(),
2801                                docstring: s.docstring.clone(),
2802                            })
2803                            .collect(),
2804                        calls: call_data_local.clone(),
2805                        imports: imports.clone(),
2806                        type_methods: Vec::new(), // type_methods not extracted in incremental path
2807                        type_refs: type_refs.clone(),
2808                    };
2809                    if let Err(e) =
2810                        ca.put(hash.as_bytes(), EXTRACTOR_VERSION, &grammar, &cached_store)
2811                    {
2812                        tracing::warn!("normalize-facts: CA cache put error: {}", e);
2813                    }
2814                }
2815
2816                (sym_data, call_data_local, imports, type_refs)
2817            };
2818
2819            // Insert symbols
2820            for sym in &sym_data {
2821                self.conn.execute(
2822                    "INSERT INTO symbols (file, name, kind, start_line, end_line, parent, visibility, is_impl) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
2823                    params![file_path.clone(), sym.name.clone(), sym.kind.clone(), sym.start_line as i64, sym.end_line as i64, sym.parent.clone(), sym.visibility.clone(), sym.is_interface_impl as i64],
2824                ).await?;
2825                for attr in &sym.attributes {
2826                    self.conn.execute(
2827                        "INSERT INTO symbol_attributes (file, name, attribute) VALUES (?1, ?2, ?3)",
2828                        params![file_path.clone(), sym.name.clone(), attr.clone()],
2829                    ).await?;
2830                }
2831                if let Some(doc) = &sym.docstring {
2832                    self.conn.execute(
2833                        "INSERT INTO symbol_attributes (file, name, attribute) VALUES (?1, ?2, ?3)",
2834                        params![file_path.clone(), sym.name.clone(), format!("doc:{doc}")],
2835                    ).await?;
2836                }
2837                for iface in &sym.implements {
2838                    self.conn.execute(
2839                        "INSERT INTO symbol_implements (file, name, interface) VALUES (?1, ?2, ?3)",
2840                        params![file_path.clone(), sym.name.clone(), iface.clone()],
2841                    ).await?;
2842                }
2843                symbol_count += 1;
2844            }
2845
2846            // Insert calls
2847            for (caller_symbol, callee_name, qualifier, access, line) in &call_data {
2848                self.conn.execute(
2849                    "INSERT INTO calls (caller_file, caller_symbol, callee_name, callee_qualifier, access, line) VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
2850                    params![file_path.clone(), caller_symbol.clone(), callee_name.clone(), qualifier.clone(), access.clone(), *line as i64],
2851                ).await?;
2852                call_count += 1;
2853            }
2854
2855            // Insert imports
2856            for imp in &imports {
2857                self.conn.execute(
2858                    "INSERT INTO imports (file, module, name, alias, line, is_reexport) VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
2859                    params![file_path.clone(), imp.module.clone(), imp.name.clone(), imp.alias.clone(), imp.line as i64, imp.is_reexport as i64],
2860                ).await?;
2861                import_count += 1;
2862            }
2863
2864            // Insert type references
2865            for tr in &type_refs {
2866                self.conn.execute(
2867                    "INSERT INTO type_refs (file, source_symbol, target_type, kind, line) VALUES (?1, ?2, ?3, ?4, ?5)",
2868                    params![file_path.clone(), tr.source_symbol.clone(), tr.target_type.clone(), tr.kind.as_str(), tr.line as i64],
2869                ).await?;
2870            }
2871        }
2872
2873        Ok(CallGraphStats {
2874            symbols: symbol_count,
2875            calls: call_count,
2876            imports: import_count,
2877        })
2878    }
2879
2880    /// Incrementally update call graph for changed files only.
2881    /// Much faster than full refresh when few files changed.
2882    pub async fn incremental_call_graph_refresh(
2883        &mut self,
2884    ) -> Result<CallGraphStats, libsql::Error> {
2885        let changed = self.get_changed_files().await?;
2886
2887        // Only process supported source and data files
2888        let changed_files: Vec<String> = changed
2889            .added
2890            .into_iter()
2891            .chain(changed.modified.into_iter())
2892            .filter(|f| is_source_file(f))
2893            .collect();
2894
2895        let deleted_source_files: Vec<String> = changed
2896            .deleted
2897            .into_iter()
2898            .filter(|f| is_source_file(f))
2899            .collect();
2900
2901        if changed_files.is_empty() && deleted_source_files.is_empty() {
2902            return Ok(CallGraphStats::default());
2903        }
2904
2905        self.conn.execute("BEGIN", ()).await?;
2906        let stats = self
2907            .reindex_files(&deleted_source_files, &changed_files)
2908            .await?;
2909        self.conn.execute("COMMIT", ()).await?;
2910
2911        // Resolve any newly inserted imports to root-relative file paths.
2912        self.resolve_all_imports().await.unwrap_or_else(|e| {
2913            tracing::warn!("normalize-facts: resolve_all_imports error: {}", e);
2914            0
2915        });
2916        // Follow re-export chains so imports resolve to ultimate source files.
2917        self.trace_reexports().await.unwrap_or_else(|e| {
2918            tracing::warn!("normalize-facts: trace_reexports error: {}", e);
2919            0
2920        });
2921        // Resolve call targets using the now-populated import graph.
2922        self.resolve_all_calls().await.unwrap_or_else(|e| {
2923            tracing::warn!("normalize-facts: resolve_all_calls error: {}", e);
2924            0
2925        });
2926
2927        Ok(stats)
2928    }
2929
2930    /// Update the index for a single file (used by LSP on save).
2931    /// Skips filesystem walk — directly reindexes the given path and resolves imports/calls.
2932    pub async fn update_file(&mut self, rel_path: &str) -> Result<CallGraphStats, libsql::Error> {
2933        let full_path = self.root.join(rel_path);
2934        let exists = full_path.exists();
2935
2936        // Update the files table mtime
2937        if exists {
2938            let metadata = std::fs::metadata(&full_path).ok();
2939            let mtime = metadata
2940                .and_then(|m| m.modified().ok())
2941                .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
2942                .map(|d| d.as_secs() as i64)
2943                .unwrap_or(0);
2944            self.conn
2945                .execute(
2946                    "UPDATE files SET mtime = ?1 WHERE path = ?2",
2947                    params![mtime, rel_path.to_string()],
2948                )
2949                .await?;
2950        }
2951
2952        if !is_source_file(rel_path) {
2953            return Ok(CallGraphStats::default());
2954        }
2955
2956        self.conn.execute("BEGIN", ()).await?;
2957        let stats = if exists {
2958            self.reindex_files(&[], &[rel_path.to_string()]).await?
2959        } else {
2960            self.reindex_files(&[rel_path.to_string()], &[]).await?
2961        };
2962        self.conn.execute("COMMIT", ()).await?;
2963
2964        self.resolve_all_imports().await.unwrap_or_else(|e| {
2965            tracing::warn!("normalize-facts: resolve_all_imports error: {}", e);
2966            0
2967        });
2968        self.trace_reexports().await.unwrap_or_else(|e| {
2969            tracing::warn!("normalize-facts: trace_reexports error: {}", e);
2970            0
2971        });
2972        self.resolve_all_calls().await.unwrap_or_else(|e| {
2973            tracing::warn!("normalize-facts: resolve_all_calls error: {}", e);
2974            0
2975        });
2976
2977        Ok(stats)
2978    }
2979
2980    /// Check if call graph needs refresh
2981    #[allow(dead_code)] // FileIndex API - used by daemon
2982    pub async fn needs_call_graph_refresh(&self) -> bool {
2983        self.call_graph_stats().await.unwrap_or_default().symbols == 0
2984    }
2985
2986    /// Find files matching a query using LIKE (fast pre-filter)
2987    /// Splits query by whitespace/separators and requires all parts to match
2988    /// Special case: queries starting with '.' are treated as extension patterns
2989    pub async fn find_like(&self, query: &str) -> Result<Vec<IndexedFile>, libsql::Error> {
2990        // Handle extension patterns (e.g., ".rs", ".py")
2991        if query.starts_with('.') && !query.contains('/') {
2992            let pattern = format!("%{}", query.to_lowercase());
2993            let mut rows = self.conn.query(
2994                "SELECT path, is_dir, mtime, lines FROM files WHERE LOWER(path) LIKE ?1 LIMIT 1000",
2995                params![pattern],
2996            ).await?;
2997            let mut files = Vec::new();
2998            while let Some(row) = rows.next().await? {
2999                files.push(IndexedFile {
3000                    path: row.get(0)?,
3001                    is_dir: row.get::<i64>(1)? != 0,
3002                    mtime: row.get(2)?,
3003                    lines: u64::try_from(row.get::<i64>(3)?).unwrap_or(0) as usize,
3004                });
3005            }
3006            return Ok(files);
3007        }
3008
3009        // Normalize query: split on whitespace and common separators (but not '.')
3010        let parts: Vec<&str> = query
3011            .split(|c: char| c.is_whitespace() || c == '_' || c == '-')
3012            .filter(|s| !s.is_empty())
3013            .collect();
3014
3015        if parts.is_empty() {
3016            return Ok(Vec::new());
3017        }
3018
3019        // Cap to 4 parts before building SQL so ?1..?N matches the bound params count.
3020        let parts: Vec<&str> = parts.into_iter().take(4).collect();
3021
3022        // Build WHERE clause: LOWER(path) LIKE '%part1%' AND LOWER(path) LIKE '%part2%' ...
3023        let conditions: Vec<String> = (0..parts.len())
3024            .map(|i| format!("LOWER(path) LIKE ?{}", i + 1))
3025            .collect();
3026        let sql = format!(
3027            "SELECT path, is_dir, mtime, lines FROM files WHERE {} LIMIT 50",
3028            conditions.join(" AND ")
3029        );
3030
3031        let patterns: Vec<String> = parts
3032            .iter()
3033            .map(|p| format!("%{}%", p.to_lowercase()))
3034            .collect();
3035
3036        // For dynamic params, we need to build them differently
3037        // libsql doesn't support dynamic parameter slices the same way
3038        // Use a simpler approach for up to common cases
3039        let mut files = Vec::new();
3040        let mut rows = match patterns.len() {
3041            1 => self.conn.query(&sql, params![patterns[0].clone()]).await?,
3042            2 => {
3043                self.conn
3044                    .query(&sql, params![patterns[0].clone(), patterns[1].clone()])
3045                    .await?
3046            }
3047            3 => {
3048                self.conn
3049                    .query(
3050                        &sql,
3051                        params![
3052                            patterns[0].clone(),
3053                            patterns[1].clone(),
3054                            patterns[2].clone()
3055                        ],
3056                    )
3057                    .await?
3058            }
3059            4 => {
3060                self.conn
3061                    .query(
3062                        &sql,
3063                        params![
3064                            patterns[0].clone(),
3065                            patterns[1].clone(),
3066                            patterns[2].clone(),
3067                            patterns[3].clone()
3068                        ],
3069                    )
3070                    .await?
3071            }
3072            // parts is capped to 4 above, so len > 4 is unreachable
3073            _ => unreachable!("parts capped to 4"),
3074        };
3075
3076        while let Some(row) = rows.next().await? {
3077            files.push(IndexedFile {
3078                path: row.get(0)?,
3079                is_dir: row.get::<i64>(1)? != 0,
3080                mtime: row.get(2)?,
3081                lines: u64::try_from(row.get::<i64>(3)?).unwrap_or(0) as usize,
3082            });
3083        }
3084        Ok(files)
3085    }
3086
3087    /// Rebuild (or incrementally update) the co-change edges table from git history.
3088    ///
3089    /// When `since_commit` is `None`, performs a full rebuild: clears the table and walks
3090    /// all commits. When `since_commit` is `Some(sha)`, walks only commits after that SHA
3091    /// and merges counts into the existing table before re-applying the per-file fanout cap.
3092    ///
3093    /// Algorithm:
3094    /// 1. Walk commits via gix (pure-Rust, no `git` binary required).
3095    /// 2. For each commit: skip if it touches >50 files (large mechanical commit, no signal).
3096    /// 3. For each pair of source files in a commit: increment co-change count.
3097    /// 4. Apply filters: drop pairs with count < 2, cap each file to top 20 partners.
3098    /// 5. Upsert into `co_change_edges`.
3099    /// 6. Record HEAD SHA in `meta.co_change_last_commit` for incremental use.
3100    pub async fn rebuild_co_change_edges(
3101        &self,
3102        since_commit: Option<&str>,
3103    ) -> Result<usize, libsql::Error> {
3104        use std::collections::HashMap;
3105
3106        let root = &self.root;
3107
3108        // Open gix repository. If not a git repo, silently skip (not an error).
3109        let repo = match open_gix_repo(root) {
3110            Some(r) => r,
3111            None => {
3112                tracing::debug!("co-change: no git repository found at {:?}, skipping", root);
3113                return Ok(0);
3114            }
3115        };
3116
3117        let head_sha = match repo.head_id() {
3118            Ok(id) => id.to_string(),
3119            Err(_) => return Ok(0),
3120        };
3121
3122        // Walk commits, collecting per-commit file lists.
3123        let commit_files = walk_commits_for_co_change(&repo, since_commit);
3124
3125        if commit_files.is_empty() && since_commit.is_none() {
3126            // No history (or empty repo): ensure table is cleared and metadata stored.
3127            self.conn.execute("DELETE FROM co_change_edges", ()).await?;
3128            self.conn
3129                .execute(
3130                    "INSERT OR REPLACE INTO meta (key, value) VALUES ('co_change_last_commit', ?1)",
3131                    params![head_sha],
3132                )
3133                .await?;
3134            return Ok(0);
3135        }
3136
3137        // For incremental: load existing counts from DB, merge new counts, re-apply cap.
3138        // For full: start fresh.
3139        let mut pair_counts: HashMap<(String, String), usize> = HashMap::new();
3140
3141        if since_commit.is_some() {
3142            // Load existing edges into the map so we can merge.
3143            let mut rows = self
3144                .conn
3145                .query("SELECT file_a, file_b, count FROM co_change_edges", ())
3146                .await?;
3147            while let Some(row) = rows.next().await? {
3148                let a: String = row.get(0)?;
3149                let b: String = row.get(1)?;
3150                let c: i64 = row.get(2)?;
3151                pair_counts.insert((a, b), c as usize);
3152            }
3153        }
3154
3155        // Accumulate new commit data.
3156        for files in &commit_files {
3157            // Files are already filtered to source files only.
3158            if files.len() > 50 || files.len() < 2 {
3159                continue;
3160            }
3161            let mut sorted = files.clone();
3162            sorted.sort_unstable();
3163            sorted.dedup();
3164            for i in 0..sorted.len() {
3165                for j in (i + 1)..sorted.len() {
3166                    let key = (sorted[i].clone(), sorted[j].clone());
3167                    *pair_counts.entry(key).or_default() += 1;
3168                }
3169            }
3170        }
3171
3172        // Apply filters: drop count < 2, apply per-file top-20 fanout cap.
3173        pair_counts.retain(|_, v| *v >= 2);
3174        let pair_counts = apply_fanout_cap(pair_counts, 20);
3175
3176        // Write to DB.
3177        if since_commit.is_some() {
3178            // Full replace: clear and reinsert (we have the full merged set).
3179            self.conn.execute("DELETE FROM co_change_edges", ()).await?;
3180        } else {
3181            self.conn.execute("DELETE FROM co_change_edges", ()).await?;
3182        }
3183
3184        let mut inserted = 0usize;
3185        for ((a, b), count) in &pair_counts {
3186            self.conn.execute(
3187                "INSERT OR REPLACE INTO co_change_edges (file_a, file_b, count) VALUES (?1, ?2, ?3)",
3188                params![a.clone(), b.clone(), *count as i64],
3189            ).await?;
3190            inserted += 1;
3191        }
3192
3193        // Record the HEAD SHA so the next incremental run knows where to resume.
3194        self.conn
3195            .execute(
3196                "INSERT OR REPLACE INTO meta (key, value) VALUES ('co_change_last_commit', ?1)",
3197                params![head_sha],
3198            )
3199            .await?;
3200
3201        Ok(inserted)
3202    }
3203
3204    /// Query co-change edges from the index.
3205    ///
3206    /// Returns pairs `(file_a, file_b, count)` where count >= `min_count`.
3207    /// Returns `Ok(None)` if the `co_change_edges` table is empty (not yet built),
3208    /// so callers can fall back to the git walk.
3209    pub async fn query_co_change_edges(
3210        &self,
3211        min_count: usize,
3212    ) -> Result<Option<Vec<(String, String, usize)>>, libsql::Error> {
3213        // Check if the table has any data.
3214        let mut check = self
3215            .conn
3216            .query("SELECT COUNT(*) FROM co_change_edges", ())
3217            .await?;
3218        let total: i64 = if let Some(row) = check.next().await? {
3219            row.get(0)?
3220        } else {
3221            0
3222        };
3223        if total == 0 {
3224            return Ok(None);
3225        }
3226
3227        let mut rows = self
3228            .conn
3229            .query(
3230                "SELECT file_a, file_b, count FROM co_change_edges WHERE count >= ?1",
3231                params![min_count as i64],
3232            )
3233            .await?;
3234
3235        let mut result = Vec::new();
3236        while let Some(row) = rows.next().await? {
3237            let a: String = row.get(0)?;
3238            let b: String = row.get(1)?;
3239            let c: i64 = row.get(2)?;
3240            result.push((a, b, c as usize));
3241        }
3242        Ok(Some(result))
3243    }
3244
3245    /// Return the stored HEAD SHA from the last co-change rebuild, if any.
3246    pub async fn co_change_last_commit(&self) -> Option<String> {
3247        let mut rows = self
3248            .conn
3249            .query(
3250                "SELECT value FROM meta WHERE key = 'co_change_last_commit'",
3251                (),
3252            )
3253            .await
3254            .ok()?;
3255        let row = rows.next().await.ok()??;
3256        row.get(0).ok()
3257    }
3258
3259    // -------------------------------------------------------------------------
3260    // Diagnostics cache (daemon use only)
3261    // -------------------------------------------------------------------------
3262
3263    /// Persist rkyv-serialized diagnostics blob for one engine ("syntax", "fact", "native", "all").
3264    /// Replaces any previous value for that engine.
3265    ///
3266    /// `config_hash` is stamped on the row so callers can detect blobs produced
3267    /// under a different config (cross-daemon-restart staleness). See
3268    /// `load_diagnostics_blob` for the matching read side.
3269    pub async fn save_diagnostics_blob(
3270        &self,
3271        engine: &str,
3272        blob: &[u8],
3273        config_hash: &str,
3274    ) -> Result<(), libsql::Error> {
3275        let now = std::time::SystemTime::now()
3276            .duration_since(std::time::UNIX_EPOCH)
3277            .unwrap_or_default()
3278            .as_secs() as i64;
3279        self.conn
3280            .execute(
3281                "INSERT OR REPLACE INTO daemon_diagnostics (engine, issues_blob, config_hash, updated_at)
3282                 VALUES (?1, ?2, ?3, ?4)",
3283                params![engine.to_string(), blob.to_vec(), config_hash.to_string(), now],
3284            )
3285            .await?;
3286        Ok(())
3287    }
3288
3289    /// Load rkyv-serialized diagnostics blob for one engine.
3290    ///
3291    /// Returns `None` if no row exists *or* the row's `config_hash` does not
3292    /// match `expected_hash`. The mismatch case is treated as a cache miss so
3293    /// the caller will reprime under the current config rather than serving a
3294    /// blob from a previous daemon session.
3295    pub async fn load_diagnostics_blob(
3296        &self,
3297        engine: &str,
3298        expected_hash: &str,
3299    ) -> Result<Option<Vec<u8>>, libsql::Error> {
3300        let mut rows = self
3301            .conn
3302            .query(
3303                "SELECT issues_blob, config_hash FROM daemon_diagnostics WHERE engine = ?1",
3304                params![engine.to_string()],
3305            )
3306            .await?;
3307        if let Some(row) = rows.next().await? {
3308            let blob: Vec<u8> = row.get(0)?;
3309            let stored_hash: String = row.get(1)?;
3310            if stored_hash == expected_hash {
3311                Ok(Some(blob))
3312            } else {
3313                Ok(None)
3314            }
3315        } else {
3316            Ok(None)
3317        }
3318    }
3319
3320    /// Replace per-file diagnostics blobs in a single transaction.
3321    ///
3322    /// `upserts`: `(relative_path, rkyv_blob)` — files that have issues.
3323    /// `deletes`: relative paths that became clean (had a row, now don't).
3324    ///
3325    /// All upserts and deletes commit atomically so readers never see a
3326    /// partially-updated state.
3327    pub async fn save_diagnostics_per_file(
3328        &self,
3329        upserts: &[(String, Vec<u8>)],
3330        deletes: &[String],
3331        config_hash: &str,
3332    ) -> Result<(), libsql::Error> {
3333        let now = std::time::SystemTime::now()
3334            .duration_since(std::time::UNIX_EPOCH)
3335            .unwrap_or_default()
3336            .as_secs() as i64;
3337        self.conn.execute("BEGIN", ()).await?;
3338        let result: Result<(), libsql::Error> = async {
3339            for (path, blob) in upserts {
3340                self.conn
3341                    .execute(
3342                        "INSERT OR REPLACE INTO daemon_diagnostics_per_file
3343                         (path, issues_blob, config_hash, updated_at) VALUES (?1, ?2, ?3, ?4)",
3344                        params![path.clone(), blob.clone(), config_hash.to_string(), now],
3345                    )
3346                    .await?;
3347            }
3348            for path in deletes {
3349                self.conn
3350                    .execute(
3351                        "DELETE FROM daemon_diagnostics_per_file WHERE path = ?1",
3352                        params![path.clone()],
3353                    )
3354                    .await?;
3355            }
3356            Ok(())
3357        }
3358        .await;
3359        match result {
3360            Ok(()) => {
3361                self.conn.execute("COMMIT", ()).await?;
3362                Ok(())
3363            }
3364            Err(e) => {
3365                let _ = self.conn.execute("ROLLBACK", ()).await;
3366                Err(e)
3367            }
3368        }
3369    }
3370
3371    /// Load the rkyv blob for one file. `None` = no row (file is clean) or the
3372    /// row's `config_hash` doesn't match `expected_hash` (stale across config
3373    /// change).
3374    pub async fn load_diagnostics_for_file(
3375        &self,
3376        path: &str,
3377        expected_hash: &str,
3378    ) -> Result<Option<Vec<u8>>, libsql::Error> {
3379        let mut rows = self
3380            .conn
3381            .query(
3382                "SELECT issues_blob, config_hash FROM daemon_diagnostics_per_file WHERE path = ?1",
3383                params![path.to_string()],
3384            )
3385            .await?;
3386        if let Some(row) = rows.next().await? {
3387            let blob: Vec<u8> = row.get(0)?;
3388            let stored_hash: String = row.get(1)?;
3389            if stored_hash == expected_hash {
3390                Ok(Some(blob))
3391            } else {
3392                Ok(None)
3393            }
3394        } else {
3395            Ok(None)
3396        }
3397    }
3398
3399    /// Load blobs for many files. Skips files with no row or whose stored
3400    /// `config_hash` doesn't match `expected_hash`.
3401    /// Returns `(path, blob)` pairs in arbitrary order.
3402    pub async fn load_diagnostics_for_files(
3403        &self,
3404        paths: &[String],
3405        expected_hash: &str,
3406    ) -> Result<Vec<(String, Vec<u8>)>, libsql::Error> {
3407        let mut out = Vec::new();
3408        for path in paths {
3409            let mut rows = self
3410                .conn
3411                .query(
3412                    "SELECT path, issues_blob, config_hash FROM daemon_diagnostics_per_file WHERE path = ?1",
3413                    params![path.clone()],
3414                )
3415                .await?;
3416            if let Some(row) = rows.next().await? {
3417                let p: String = row.get(0)?;
3418                let b: Vec<u8> = row.get(1)?;
3419                let stored_hash: String = row.get(2)?;
3420                if stored_hash == expected_hash {
3421                    out.push((p, b));
3422                }
3423            }
3424        }
3425        Ok(out)
3426    }
3427
3428    /// Drop every cached diagnostic row (both per-engine blobs and the
3429    /// per-file table). Used by the daemon when `.normalize/config.toml` or a
3430    /// rule-definition file changes — the cached blobs reflect the *previous*
3431    /// config, so they must be cleared before a full reprime to prevent stale
3432    /// `RunRules` results being served between the config change and the
3433    /// reprime completing.
3434    pub async fn clear_all_diagnostics(&self) -> Result<(), libsql::Error> {
3435        self.conn
3436            .execute("DELETE FROM daemon_diagnostics", ())
3437            .await?;
3438        self.conn
3439            .execute("DELETE FROM daemon_diagnostics_per_file", ())
3440            .await?;
3441        Ok(())
3442    }
3443
3444    /// Return all paths that currently have a per-file diagnostics row.
3445    /// Used by the daemon refresh diff to detect files that became clean.
3446    pub async fn list_diagnostic_paths(&self) -> Result<Vec<String>, libsql::Error> {
3447        let mut rows = self
3448            .conn
3449            .query("SELECT path FROM daemon_diagnostics_per_file", ())
3450            .await?;
3451        let mut out = Vec::new();
3452        while let Some(row) = rows.next().await? {
3453            out.push(row.get(0)?);
3454        }
3455        Ok(out)
3456    }
3457}
3458
3459// =============================================================================
3460// Co-change helpers (not on FileIndex — free functions to keep impl clean)
3461// =============================================================================
3462
3463/// Open a gix repository at or containing `root`.
3464fn open_gix_repo(root: &std::path::Path) -> Option<gix::Repository> {
3465    gix::discover(root)
3466        .ok()
3467        .map(|r| r.into_sync().to_thread_local())
3468}
3469
3470/// Walk commits via gix, returning per-commit lists of *source* files changed.
3471///
3472/// If `since_commit` is `Some(sha)`, only commits after (exclusive) that SHA are returned.
3473/// Commits are yielded oldest-first from the HEAD ancestry.
3474fn walk_commits_for_co_change(
3475    repo: &gix::Repository,
3476    since_commit: Option<&str>,
3477) -> Vec<Vec<String>> {
3478    let head_id = match repo.head_id() {
3479        Ok(id) => id,
3480        Err(_) => return Vec::new(),
3481    };
3482    let walk = match head_id.ancestors().all() {
3483        Ok(w) => w,
3484        Err(_) => return Vec::new(),
3485    };
3486
3487    // If since_commit is specified, resolve it to an ObjectId for fast comparison.
3488    let stop_id: Option<gix::hash::ObjectId> = since_commit.and_then(|sha| sha.parse().ok());
3489
3490    let mut result = Vec::new();
3491
3492    for info in walk {
3493        let Ok(info) = info else { continue };
3494        let commit_id = info.id();
3495
3496        // Stop when we hit the commit we already processed.
3497        if let Some(ref stop) = stop_id
3498            && commit_id == *stop
3499        {
3500            break;
3501        }
3502
3503        let Ok(commit) = info.object() else { continue };
3504        let Ok(tree) = commit.tree() else { continue };
3505
3506        let parent_tree = info
3507            .parent_ids()
3508            .next()
3509            .and_then(|pid| pid.object().ok())
3510            .and_then(|obj| obj.into_commit().tree().ok());
3511
3512        let changes = match repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&tree), None) {
3513            Ok(c) => c,
3514            Err(_) => continue,
3515        };
3516
3517        let files: Vec<String> = changes
3518            .into_iter()
3519            .filter_map(|change| {
3520                use gix::object::tree::diff::ChangeDetached;
3521                let location = match change {
3522                    ChangeDetached::Addition { location, .. } => location,
3523                    ChangeDetached::Deletion { location, .. } => location,
3524                    ChangeDetached::Modification { location, .. } => location,
3525                    ChangeDetached::Rewrite {
3526                        source_location, ..
3527                    } => source_location,
3528                };
3529                let path_str = String::from_utf8_lossy(&location).into_owned();
3530                // Only include source files (those with a supported language extension).
3531                if is_source_file(&path_str) {
3532                    Some(path_str)
3533                } else {
3534                    None
3535                }
3536            })
3537            .collect();
3538
3539        if files.len() >= 2 {
3540            result.push(files);
3541        }
3542    }
3543
3544    result
3545}
3546
3547/// Apply a per-file fanout cap: for each file, keep only its top `cap` partners by count.
3548///
3549/// Returns a new HashMap with entries pruned to satisfy the cap.
3550fn apply_fanout_cap(
3551    pair_counts: std::collections::HashMap<(String, String), usize>,
3552    cap: usize,
3553) -> std::collections::HashMap<(String, String), usize> {
3554    use std::collections::HashMap;
3555
3556    // Build per-file partner lists.
3557    let mut file_partners: HashMap<String, Vec<(String, usize)>> = HashMap::new();
3558    for ((a, b), count) in &pair_counts {
3559        file_partners
3560            .entry(a.clone())
3561            .or_default()
3562            .push((b.clone(), *count));
3563        file_partners
3564            .entry(b.clone())
3565            .or_default()
3566            .push((a.clone(), *count));
3567    }
3568
3569    // For each file, keep only the top `cap` partners.
3570    let mut allowed: std::collections::HashSet<(String, String)> = std::collections::HashSet::new();
3571    for (file, mut partners) in file_partners {
3572        partners.sort_unstable_by(|a, b| b.1.cmp(&a.1));
3573        partners.truncate(cap);
3574        for (partner, _) in partners {
3575            // Canonical key: lexicographically smaller goes first.
3576            let key = if file <= partner {
3577                (file.clone(), partner)
3578            } else {
3579                (partner, file.clone())
3580            };
3581            allowed.insert(key);
3582        }
3583    }
3584
3585    pair_counts
3586        .into_iter()
3587        .filter(|(k, _)| allowed.contains(k))
3588        .collect()
3589}
3590
3591#[cfg(test)]
3592mod tests {
3593    use super::*;
3594    use std::fs;
3595    use tempfile::tempdir;
3596
3597    #[tokio::test]
3598    async fn test_index_creation() {
3599        let dir = tempdir().unwrap();
3600        fs::create_dir_all(dir.path().join("src/myapp")).unwrap();
3601        fs::write(dir.path().join("src/myapp/cli.py"), "").unwrap();
3602        fs::write(dir.path().join("src/myapp/dwim.py"), "").unwrap();
3603
3604        let mut index = FileIndex::open(&dir.path().join("index.sqlite"), dir.path())
3605            .await
3606            .unwrap();
3607        assert!(index.needs_refresh().await);
3608
3609        let count = index.refresh().await.unwrap();
3610        assert!(count >= 2);
3611
3612        // Should find files by name
3613        let matches = index.find_by_name("cli.py").await.unwrap();
3614        assert_eq!(matches.len(), 1);
3615        assert!(matches[0].path.ends_with("cli.py"));
3616    }
3617
3618    #[tokio::test]
3619    async fn test_find_by_stem() {
3620        let dir = tempdir().unwrap();
3621        fs::create_dir_all(dir.path().join("src")).unwrap();
3622        fs::write(dir.path().join("src/test.py"), "").unwrap();
3623        fs::write(dir.path().join("src/test.rs"), "").unwrap();
3624
3625        let mut index = FileIndex::open(&dir.path().join("index.sqlite"), dir.path())
3626            .await
3627            .unwrap();
3628        index.refresh().await.unwrap();
3629
3630        let matches = index.find_by_stem("test").await.unwrap();
3631        assert_eq!(matches.len(), 2);
3632    }
3633
3634    #[tokio::test]
3635    async fn test_wildcard_import_resolution() {
3636        let dir = tempdir().unwrap();
3637        fs::create_dir_all(dir.path().join("src/mylib")).unwrap();
3638        // Module that exports MyClass
3639        fs::write(
3640            dir.path().join("src/mylib/exports.py"),
3641            "class MyClass: pass",
3642        )
3643        .unwrap();
3644        // Module that exports OtherThing
3645        fs::write(
3646            dir.path().join("src/mylib/other.py"),
3647            "def OtherThing(): pass",
3648        )
3649        .unwrap();
3650        // Consumer with wildcard imports
3651        fs::write(
3652            dir.path().join("src/consumer.py"),
3653            "from mylib.exports import *\nfrom mylib.other import *\nMyClass()",
3654        )
3655        .unwrap();
3656
3657        let mut index = FileIndex::open(&dir.path().join("index.sqlite"), dir.path())
3658            .await
3659            .unwrap();
3660        index.refresh().await.unwrap();
3661        index.refresh_call_graph().await.unwrap();
3662
3663        // Now resolve MyClass - should find it in mylib.exports
3664        let result = index
3665            .resolve_import("src/consumer.py", "MyClass")
3666            .await
3667            .unwrap();
3668        assert!(result.is_some(), "Should resolve MyClass");
3669        let (module, name) = result.unwrap();
3670        assert_eq!(module, "mylib.exports");
3671        assert_eq!(name, "MyClass");
3672
3673        // Resolve OtherThing - should find it in mylib.other
3674        let result = index
3675            .resolve_import("src/consumer.py", "OtherThing")
3676            .await
3677            .unwrap();
3678        assert!(result.is_some(), "Should resolve OtherThing");
3679        let (module, name) = result.unwrap();
3680        assert_eq!(module, "mylib.other");
3681        assert_eq!(name, "OtherThing");
3682    }
3683
3684    #[tokio::test]
3685    async fn test_method_call_resolution() {
3686        let dir = tempdir().unwrap();
3687        fs::create_dir_all(dir.path().join("src")).unwrap();
3688        // A class with methods that call each other
3689        let class_code = r#"
3690class MyClass:
3691    def method_a(self):
3692        self.method_b()
3693
3694    def method_b(self):
3695        pass
3696
3697    def method_c(self):
3698        self.method_b()
3699"#;
3700        fs::write(dir.path().join("src/myclass.py"), class_code).unwrap();
3701
3702        let mut index = FileIndex::open(&dir.path().join("index.sqlite"), dir.path())
3703            .await
3704            .unwrap();
3705        index.refresh().await.unwrap();
3706        index.refresh_call_graph().await.unwrap();
3707
3708        // Find callers of method_b - should include method_a and method_c
3709        let callers = index
3710            .find_callers("method_b", "src/myclass.py")
3711            .await
3712            .unwrap();
3713        assert!(!callers.is_empty(), "Should find callers of method_b");
3714
3715        let caller_names: Vec<&str> = callers
3716            .iter()
3717            .map(|(_, name, _, _)| name.as_str())
3718            .collect();
3719        assert!(
3720            caller_names.contains(&"method_a"),
3721            "method_a should call method_b"
3722        );
3723        assert!(
3724            caller_names.contains(&"method_c"),
3725            "method_c should call method_b"
3726        );
3727
3728        // Find callers of MyClass.method_b - more specific
3729        let callers = index
3730            .find_callers("MyClass.method_b", "src/myclass.py")
3731            .await
3732            .unwrap();
3733        assert!(
3734            !callers.is_empty(),
3735            "Should find callers of MyClass.method_b"
3736        );
3737    }
3738
3739    /// Regression test: find_callers must not return callers of a same-named function
3740    /// in a different module. Two modules define `helper()`, and `main.py` imports only
3741    /// one of them. `find_callers("helper", "src/utils_a.py")` must not include calls
3742    /// that target `src/utils_b.py`'s `helper()`.
3743    #[tokio::test]
3744    async fn test_find_callers_cross_module_disambiguation() {
3745        let dir = tempdir().unwrap();
3746        fs::create_dir_all(dir.path().join("src")).unwrap();
3747
3748        // Two modules with the same function name
3749        fs::write(
3750            dir.path().join("src/utils_a.py"),
3751            "def helper():\n    return 'A'\n",
3752        )
3753        .unwrap();
3754        fs::write(
3755            dir.path().join("src/utils_b.py"),
3756            "def helper():\n    return 'B'\n",
3757        )
3758        .unwrap();
3759
3760        // caller_a.py imports from utils_a and calls helper()
3761        fs::write(
3762            dir.path().join("src/caller_a.py"),
3763            "from utils_a import helper\n\ndef do_a():\n    helper()\n",
3764        )
3765        .unwrap();
3766
3767        // caller_b.py imports from utils_b and calls helper()
3768        fs::write(
3769            dir.path().join("src/caller_b.py"),
3770            "from utils_b import helper\n\ndef do_b():\n    helper()\n",
3771        )
3772        .unwrap();
3773
3774        let mut index = FileIndex::open(&dir.path().join("index.sqlite"), dir.path())
3775            .await
3776            .unwrap();
3777        index.refresh().await.unwrap();
3778        index.refresh_call_graph().await.unwrap();
3779
3780        // Check whether imports got resolved (depends on normalize-local-deps Python support)
3781        let mut rows = index
3782            .connection()
3783            .query(
3784                "SELECT file, resolved_file FROM imports WHERE name = 'helper' ORDER BY file",
3785                (),
3786            )
3787            .await
3788            .unwrap();
3789        let mut import_resolution: Vec<(String, Option<String>)> = Vec::new();
3790        while let Some(row) = rows.next().await.unwrap() {
3791            import_resolution.push((row.get(0).unwrap(), row.get(1).unwrap()));
3792        }
3793
3794        // Check whether calls got resolved
3795        let mut rows = index
3796            .connection()
3797            .query(
3798                "SELECT caller_file, callee_name, callee_resolved_file FROM calls WHERE callee_name = 'helper' ORDER BY caller_file",
3799                (),
3800            )
3801            .await
3802            .unwrap();
3803        let mut call_resolution: Vec<(String, String, Option<String>)> = Vec::new();
3804        while let Some(row) = rows.next().await.unwrap() {
3805            call_resolution.push((
3806                row.get(0).unwrap(),
3807                row.get(1).unwrap(),
3808                row.get(2).unwrap(),
3809            ));
3810        }
3811
3812        // Ask for callers of utils_a's helper
3813        let callers = index
3814            .find_callers("helper", "src/utils_a.py")
3815            .await
3816            .unwrap();
3817        let caller_files: Vec<&str> = callers.iter().map(|(f, _, _, _)| f.as_str()).collect();
3818
3819        // When imports are resolved, disambiguation is precise — only the correct
3820        // caller appears. When unresolved (no LocalDeps for test setup), both
3821        // callers may appear via the NULL fallback. Either way caller_a must appear.
3822        assert!(
3823            caller_files.contains(&"src/caller_a.py"),
3824            "caller_a.py calls helper() (imports utils_a), must be a caller. Got: {:?}\nimports: {:?}\ncalls: {:?}",
3825            caller_files,
3826            import_resolution,
3827            call_resolution,
3828        );
3829
3830        let imports_resolved = import_resolution
3831            .iter()
3832            .any(|(_, r)| r.as_deref() == Some("src/utils_a.py"));
3833        if imports_resolved {
3834            assert!(
3835                !caller_files.contains(&"src/caller_b.py"),
3836                "caller_b.py imports utils_b, should NOT be a caller of utils_a::helper. Got: {:?}",
3837                caller_files
3838            );
3839        }
3840
3841        // Ask for callers of utils_b's helper
3842        let callers = index
3843            .find_callers("helper", "src/utils_b.py")
3844            .await
3845            .unwrap();
3846        let caller_files: Vec<&str> = callers.iter().map(|(f, _, _, _)| f.as_str()).collect();
3847        assert!(
3848            caller_files.contains(&"src/caller_b.py"),
3849            "caller_b.py calls helper() (imports utils_b), must be a caller. Got: {:?}\nimports: {:?}\ncalls: {:?}",
3850            caller_files,
3851            import_resolution,
3852            call_resolution,
3853        );
3854        if imports_resolved {
3855            assert!(
3856                !caller_files.contains(&"src/caller_a.py"),
3857                "caller_a.py imports utils_a, should NOT be a caller of utils_b::helper. Got: {:?}",
3858                caller_files
3859            );
3860        }
3861    }
3862
3863    // =====================================================================
3864    // Per-file diagnostics storage tests
3865    // =====================================================================
3866
3867    /// Build a FileIndex on an empty tempdir for diagnostics-table tests.
3868    async fn empty_index(dir: &std::path::Path) -> FileIndex {
3869        FileIndex::open(&dir.join("index.sqlite"), dir)
3870            .await
3871            .unwrap()
3872    }
3873
3874    #[tokio::test]
3875    async fn per_file_save_upsert_and_delete_roundtrip() {
3876        let dir = tempdir().unwrap();
3877        let index = empty_index(dir.path()).await;
3878
3879        let upserts = vec![
3880            ("a.rs".to_string(), vec![1u8, 2, 3]),
3881            ("b.rs".to_string(), vec![4, 5, 6]),
3882        ];
3883        index
3884            .save_diagnostics_per_file(&upserts, &[], "h1")
3885            .await
3886            .unwrap();
3887
3888        let a = index.load_diagnostics_for_file("a.rs", "h1").await.unwrap();
3889        let b = index.load_diagnostics_for_file("b.rs", "h1").await.unwrap();
3890        assert_eq!(a, Some(vec![1, 2, 3]));
3891        assert_eq!(b, Some(vec![4, 5, 6]));
3892
3893        // Now delete a.rs and update b.rs in the same call.
3894        let upserts2 = vec![("b.rs".to_string(), vec![9, 9])];
3895        let deletes2 = vec!["a.rs".to_string()];
3896        index
3897            .save_diagnostics_per_file(&upserts2, &deletes2, "h1")
3898            .await
3899            .unwrap();
3900
3901        assert_eq!(
3902            index.load_diagnostics_for_file("a.rs", "h1").await.unwrap(),
3903            None
3904        );
3905        assert_eq!(
3906            index.load_diagnostics_for_file("b.rs", "h1").await.unwrap(),
3907            Some(vec![9, 9])
3908        );
3909    }
3910
3911    #[tokio::test]
3912    async fn per_file_save_empty_inputs_is_noop() {
3913        let dir = tempdir().unwrap();
3914        let index = empty_index(dir.path()).await;
3915        // No-op call should succeed and leave the table empty.
3916        index
3917            .save_diagnostics_per_file(&[], &[], "h")
3918            .await
3919            .unwrap();
3920        assert!(index.list_diagnostic_paths().await.unwrap().is_empty());
3921    }
3922
3923    #[tokio::test]
3924    async fn load_diagnostics_for_file_missing_returns_none() {
3925        let dir = tempdir().unwrap();
3926        let index = empty_index(dir.path()).await;
3927        assert_eq!(
3928            index
3929                .load_diagnostics_for_file("nope.rs", "h")
3930                .await
3931                .unwrap(),
3932            None
3933        );
3934    }
3935
3936    /// A row written under one config_hash must be invisible to a load that
3937    /// presents a different hash — this is what makes the cache safe across
3938    /// daemon restarts after a config edit.
3939    #[tokio::test]
3940    async fn per_file_config_hash_mismatch_is_cache_miss() {
3941        let dir = tempdir().unwrap();
3942        let index = empty_index(dir.path()).await;
3943        index
3944            .save_diagnostics_per_file(&[("a.rs".to_string(), vec![1])], &[], "old")
3945            .await
3946            .unwrap();
3947        // Same hash → hit.
3948        assert_eq!(
3949            index
3950                .load_diagnostics_for_file("a.rs", "old")
3951                .await
3952                .unwrap(),
3953            Some(vec![1])
3954        );
3955        // Different hash → miss.
3956        assert_eq!(
3957            index
3958                .load_diagnostics_for_file("a.rs", "new")
3959                .await
3960                .unwrap(),
3961            None
3962        );
3963        let multi = index
3964            .load_diagnostics_for_files(&["a.rs".to_string()], "new")
3965            .await
3966            .unwrap();
3967        assert!(multi.is_empty());
3968    }
3969
3970    /// Same invariant for the per-engine `daemon_diagnostics` table.
3971    #[tokio::test]
3972    async fn engine_blob_config_hash_mismatch_is_cache_miss() {
3973        let dir = tempdir().unwrap();
3974        let index = empty_index(dir.path()).await;
3975        index
3976            .save_diagnostics_blob("syntax", &[7, 8, 9], "old")
3977            .await
3978            .unwrap();
3979        assert_eq!(
3980            index.load_diagnostics_blob("syntax", "old").await.unwrap(),
3981            Some(vec![7, 8, 9])
3982        );
3983        assert_eq!(
3984            index.load_diagnostics_blob("syntax", "new").await.unwrap(),
3985            None
3986        );
3987    }
3988
3989    #[tokio::test]
3990    async fn load_diagnostics_for_files_skips_missing() {
3991        let dir = tempdir().unwrap();
3992        let index = empty_index(dir.path()).await;
3993        let upserts = vec![("a.rs".to_string(), vec![1]), ("c.rs".to_string(), vec![3])];
3994        index
3995            .save_diagnostics_per_file(&upserts, &[], "h1")
3996            .await
3997            .unwrap();
3998
3999        // Mix present + missing, in a non-canonical order.
4000        let query = vec![
4001            "c.rs".to_string(),
4002            "missing.rs".to_string(),
4003            "a.rs".to_string(),
4004        ];
4005        let mut got: Vec<(String, Vec<u8>)> = index
4006            .load_diagnostics_for_files(&query, "h1")
4007            .await
4008            .unwrap();
4009        got.sort_by(|x, y| x.0.cmp(&y.0));
4010        assert_eq!(
4011            got,
4012            vec![("a.rs".to_string(), vec![1]), ("c.rs".to_string(), vec![3]),]
4013        );
4014    }
4015
4016    #[tokio::test]
4017    async fn list_diagnostic_paths_returns_all() {
4018        let dir = tempdir().unwrap();
4019        let index = empty_index(dir.path()).await;
4020        let upserts = vec![
4021            ("x".to_string(), vec![0]),
4022            ("y".to_string(), vec![0]),
4023            ("z".to_string(), vec![0]),
4024        ];
4025        index
4026            .save_diagnostics_per_file(&upserts, &[], "h")
4027            .await
4028            .unwrap();
4029        let mut paths = index.list_diagnostic_paths().await.unwrap();
4030        paths.sort();
4031        assert_eq!(paths, vec!["x", "y", "z"]);
4032    }
4033
4034    /// Smoke test: a fresh open creates the per-file diagnostics table with the
4035    /// BLOB column type required by `save_diagnostics_per_file`. (A row inserted
4036    /// with the wrong column type by an older schema version would fail this
4037    /// roundtrip — the schema_version != SCHEMA_VERSION migration block at
4038    /// `FileIndex::open` is responsible for `DROP TABLE IF EXISTS
4039    /// daemon_diagnostics_per_file` so the new shape is created cleanly.)
4040    #[tokio::test]
4041    async fn fresh_open_per_file_table_accepts_blob_roundtrip() {
4042        let dir = tempdir().unwrap();
4043        let index = FileIndex::open(&dir.path().join("index.sqlite"), dir.path())
4044            .await
4045            .unwrap();
4046        // The CREATE statement at FileIndex::open declares issues_blob BLOB NOT NULL.
4047        // Confirm the column type via PRAGMA table_info.
4048        let mut rows = index
4049            .conn
4050            .query("PRAGMA table_info(daemon_diagnostics_per_file)", ())
4051            .await
4052            .unwrap();
4053        let mut col_types: Vec<(String, String)> = Vec::new();
4054        while let Some(row) = rows.next().await.unwrap() {
4055            let name: String = row.get(1).unwrap();
4056            let ty: String = row.get(2).unwrap();
4057            col_types.push((name, ty));
4058        }
4059        let blob_col = col_types
4060            .iter()
4061            .find(|(n, _)| n == "issues_blob")
4062            .expect("issues_blob column missing");
4063        assert_eq!(
4064            blob_col.1.to_uppercase(),
4065            "BLOB",
4066            "issues_blob must be BLOB, got {:?}",
4067            blob_col.1
4068        );
4069
4070        // And the BLOB roundtrip itself works.
4071        index
4072            .save_diagnostics_per_file(&[("a".to_string(), vec![1, 2, 3])], &[], "h")
4073            .await
4074            .unwrap();
4075        assert_eq!(
4076            index.load_diagnostics_for_file("a", "h").await.unwrap(),
4077            Some(vec![1, 2, 3])
4078        );
4079    }
4080
4081    #[tokio::test]
4082    async fn invalidate_last_indexed_resets_needs_refresh_gate() {
4083        let dir = tempdir().unwrap();
4084        std::fs::write(dir.path().join("a.txt"), "x").unwrap();
4085        let mut index = FileIndex::open(&dir.path().join("index.sqlite"), dir.path())
4086            .await
4087            .unwrap();
4088        index.refresh().await.unwrap();
4089        // Just-after-refresh, the 60-second gate suppresses needs_refresh.
4090        assert!(!index.needs_refresh().await);
4091        index.invalidate_last_indexed().await.unwrap();
4092        assert!(index.needs_refresh().await);
4093    }
4094}