Skip to main content

rag_rat_core/index/
mod.rs

1pub mod ai;
2pub mod anchors;
3pub mod chunker;
4pub mod edges;
5pub mod git_history;
6pub mod github;
7pub mod parser;
8pub mod schema;
9pub mod symbols;
10pub mod walker;
11
12#[cfg(test)]
13mod anchor_tests;
14#[cfg(test)]
15mod parser_tests;
16
17use std::{
18    collections::{BTreeMap, BTreeSet},
19    fs,
20    path::{Path, PathBuf},
21    process::Command,
22    sync::{
23        atomic::{AtomicUsize, Ordering},
24        mpsc,
25    },
26    thread,
27    thread::JoinHandle,
28    time::{SystemTime, UNIX_EPOCH},
29};
30
31use gix::{
32    bstr::{BString, ByteSlice},
33    status::{UntrackedFiles, tree_index},
34};
35use rayon::prelude::*;
36use regex::Regex;
37use rusqlite::{OptionalExtension, params};
38use serde::Serialize;
39use sha2::{Digest, Sha256};
40use thiserror::Error;
41
42use crate::{
43    config::{Config, TargetKind},
44    index::{
45        ai::{LocalAiStatus, ModelInfo, ReconcilePlan, ReconcileReport},
46        anchors::{AnchorStatus, ChunkAnchor},
47        chunker::Chunk,
48        git_history::{
49            ChunkBlameSummary, CommitSearchHit, GitHistoryIndexStatus, PathHistoryItem,
50            QueryCommitHit, SymbolHistoryItem,
51        },
52        github::{GitHubEvidence, GitHubStatus, GitHubSyncReport, Papertrail},
53        symbols::Symbol,
54    },
55    language::Language,
56    query::graph_meta::{self, GraphMetaMode},
57    search::lexical::{SearchHit, SearchOptions},
58    storage::IndexConnection,
59    storage::StorageStatus,
60};
61
62#[derive(Debug)]
63pub struct IndexDatabase {
64    storage: IndexConnection,
65    pub active_commit_sha: String,
66    pub active_worktree_id: String,
67}
68
69#[derive(Debug, Clone)]
70pub enum IndexProgress {
71    Started {
72        database: PathBuf,
73        mode: IndexMode,
74    },
75    Discovering,
76    Discovered {
77        files: usize,
78    },
79    PreparingFile {
80        current: usize,
81        total: usize,
82        path: PathBuf,
83        language: Language,
84        kind: TargetKind,
85    },
86    IndexingFile {
87        current: usize,
88        total: usize,
89        path: PathBuf,
90        language: Language,
91        kind: TargetKind,
92    },
93    IndexingGitHistory,
94    RebuildingLogicalSymbols,
95    ResolvingGraph,
96    SyncingFts,
97    RebuildingFts,
98    Finished {
99        files: usize,
100    },
101}
102
103#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
104#[serde(rename_all = "snake_case")]
105pub enum IndexMode {
106    Changed,
107    Discover,
108    Full,
109}
110
111impl IndexMode {
112    pub fn label(self) -> &'static str {
113        match self {
114            Self::Changed => "changed files",
115            Self::Discover => "discovery",
116            Self::Full => "full rebuild",
117        }
118    }
119}
120
121#[derive(Debug, Serialize)]
122pub struct IndexStatus {
123    pub database: String,
124    pub exists: bool,
125    pub schema: schema::SchemaStatus,
126    pub git_commit: Option<String>,
127    pub git_dirty: Option<bool>,
128    pub indexed_at_ms: Option<i64>,
129    pub content_revision: String,
130    pub fts_synced_at_ms: Option<i64>,
131    pub fts_source_revision: Option<String>,
132    pub fts_dirty: bool,
133    pub fts_fresh: bool,
134    pub file_count_by_language: BTreeMap<String, u64>,
135    pub parser_failures: u64,
136    pub parser_failure_paths: Vec<ParserFailure>,
137    pub git_history: GitHistoryIndexStatus,
138    pub github: GitHubStatus,
139    pub local_ai: LocalAiStatus,
140}
141
142#[derive(Debug, Serialize)]
143pub struct HealIndexReport {
144    pub checked_files: u64,
145    pub healed_files: u64,
146    pub removed_files: u64,
147    pub skipped_files: u64,
148    pub fts_fresh: bool,
149    pub message: Option<String>,
150}
151
152#[derive(Debug, Clone, Serialize)]
153pub struct GcReport {
154    pub files_pruned: u64,
155    pub chunks_pruned: u64,
156    pub files_remaining: u64,
157    pub chunks_remaining: u64,
158    /// True when no live context could be determined and pruning was skipped (nothing deleted).
159    pub skipped: bool,
160}
161
162#[derive(Debug, Serialize)]
163pub struct ParserFailure {
164    pub path: String,
165    pub language: String,
166    pub message: String,
167}
168
169#[derive(Debug, Serialize)]
170pub struct DiscoveryStatus {
171    pub discovered_files: usize,
172    pub indexed_files: usize,
173    pub unindexed_files: usize,
174    pub unindexed_source_files: usize,
175    pub changed_indexed_files: usize,
176    pub removed_indexed_files: usize,
177    pub unindexed_sample: Vec<String>,
178    pub warning: Option<String>,
179}
180
181const MAX_AUTO_HEAL_FILES_PER_CALL: usize = 4;
182const GRAPH_INDEX_VERSION: &str = "6";
183
184#[derive(Debug, Error)]
185pub enum IndexError {
186    #[error("Gone: indexed chunk {chunk_id} no longer exists")]
187    Gone { chunk_id: i64 },
188    #[error("StaleChunk: chunk {chunk_id} in {path} could not be relocated after reindex")]
189    StaleChunk { chunk_id: i64, path: String },
190    #[error("needs_reindex: {stale_files} stale files exceeds automatic heal cap {cap}")]
191    NeedsReindex { stale_files: usize, cap: usize },
192}
193
194impl IndexDatabase {
195    pub fn open(path: &Path) -> anyhow::Result<Self> {
196        Self::open_with_graph_check(path, true)
197    }
198
199    pub fn database_path(&self) -> &Path {
200        self.storage.database_path()
201    }
202
203    fn open_with_graph_check(path: &Path, check_graph: bool) -> anyhow::Result<Self> {
204        let mut storage = IndexConnection::open(path)?;
205        schema::check_compatible(storage.connection())?;
206        ai::ensure_model_manifest(storage.connection())?;
207        if let Some(root) = meta_for(storage.connection(), "source_root")? {
208            storage.set_source_root(PathBuf::from(root));
209        }
210        let db =
211            Self { storage, active_commit_sha: String::new(), active_worktree_id: String::new() };
212        if check_graph {
213            db.ensure_graph_index_current()?;
214        }
215        Ok(db)
216    }
217
218    pub fn open_config(config: &Config) -> anyhow::Result<Self> {
219        let mut db = Self::open_with_graph_check(&config.database, false)?;
220        db.storage.set_source_root(config.root.clone());
221        let (commit_sha, worktree_id) = resolve_git_context(&config.root);
222        db.set_context(&commit_sha, &worktree_id)?;
223        db.ensure_graph_index_current()?;
224        Ok(db)
225    }
226
227    pub fn migrate(path: &Path) -> anyhow::Result<schema::SchemaStatus> {
228        Self::migrate_with_fastembed_cache(path, None)
229    }
230
231    fn migrate_with_fastembed_cache(
232        path: &Path,
233        fastembed_cache_dir: Option<&Path>,
234    ) -> anyhow::Result<schema::SchemaStatus> {
235        let storage = IndexConnection::open(path)?;
236        let status = schema::status(storage.connection())?;
237        match status.state {
238            schema::SchemaState::Newer | schema::SchemaState::Dirty => {
239                anyhow::bail!("{}", status.message);
240            },
241            schema::SchemaState::Compatible => {},
242            schema::SchemaState::Missing | schema::SchemaState::Older => {
243                schema::apply(storage.connection())?;
244            },
245        }
246        ai::ensure_model_manifest(storage.connection())?;
247        if let Some(fastembed_cache_dir) = fastembed_cache_dir {
248            ai::recover_cached_fastembed_model_from(storage.connection(), fastembed_cache_dir)?;
249        } else {
250            ai::recover_cached_fastembed_model(storage.connection())?;
251        }
252        schema::status(storage.connection())
253    }
254
255    pub fn migration_check(path: &Path) -> anyhow::Result<schema::SchemaStatus> {
256        let storage = IndexConnection::open(path)?;
257        schema::status(storage.connection())
258    }
259
260    fn create_or_migrate(path: &Path) -> anyhow::Result<Self> {
261        let mut storage = IndexConnection::open(path)?;
262        schema::apply(storage.connection())?;
263        ai::ensure_model_manifest(storage.connection())?;
264        if let Some(root) = meta_for(storage.connection(), "source_root")? {
265            storage.set_source_root(PathBuf::from(root));
266        }
267        Ok(Self { storage, active_commit_sha: String::new(), active_worktree_id: String::new() })
268    }
269
270    pub fn set_context(&mut self, commit_sha: &str, worktree_id: &str) -> anyhow::Result<()> {
271        self.active_commit_sha = commit_sha.to_string();
272        self.active_worktree_id = worktree_id.to_string();
273
274        let conn = self.storage.connection();
275        conn.execute_batch(
276            "
277            CREATE TEMP TABLE IF NOT EXISTS connection_context(key TEXT PRIMARY KEY, value TEXT);
278        ",
279        )?;
280
281        let mut stmt = conn.prepare(
282            "INSERT OR REPLACE INTO temp.connection_context(key, value) VALUES (?1, ?2)",
283        )?;
284        stmt.execute(params!["commit_sha", commit_sha])?;
285        stmt.execute(params!["worktree_id", worktree_id])?;
286
287        conn.execute_batch("
288            DROP VIEW IF EXISTS temp.files;
289            CREATE TEMP VIEW temp.files AS
290            SELECT id, path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms, indexed_revision, commit_sha, worktree_id
291            FROM main.files
292            WHERE worktree_id = (SELECT value FROM temp.connection_context WHERE key = 'worktree_id') AND worktree_id != '' AND kind != 'deleted'
293            UNION ALL
294            SELECT id, path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms, indexed_revision, commit_sha, worktree_id
295            FROM main.files
296            WHERE commit_sha = (SELECT value FROM temp.connection_context WHERE key = 'commit_sha')
297              AND commit_sha != ''
298              AND path NOT IN (
299                  SELECT path FROM main.files 
300                  WHERE worktree_id = (SELECT value FROM temp.connection_context WHERE key = 'worktree_id')
301                    AND worktree_id != ''
302              );
303        ")?;
304
305        Ok(())
306    }
307
308    pub fn rebuild(config: &Config) -> anyhow::Result<Self> {
309        Self::rebuild_with_progress(config, |_| {})
310    }
311
312    pub fn rebuild_with_progress<F>(config: &Config, mut progress: F) -> anyhow::Result<Self>
313    where
314        F: FnMut(IndexProgress),
315    {
316        progress(IndexProgress::Started {
317            database: config.database.clone(),
318            mode: IndexMode::Full,
319        });
320        let mut db = Self::create_or_migrate(&config.database)?;
321        let (commit_sha, worktree_id) = resolve_git_context(&config.root);
322        db.set_context(&commit_sha, &worktree_id)?;
323        progress(IndexProgress::IndexingGitHistory);
324        let mut git_history = Some(spawn_git_history_prepare(&config.root));
325        // RAM-first bulk build: a full rebuild is one big atomic write, so skip per-commit fsyncs
326        // (synchronous=OFF) and give SQLite a large page cache. Restored to NORMAL after the
327        // rebuild. Only `rebuild` uses this; incremental indexing and the watcher stay durable.
328        //
329        // NB: stay in WAL — switching journal_mode needs an EXCLUSIVE database lock, which fails
330        // ("database is locked") whenever another connection is open (e.g. the watcher, or a
331        // concurrent reader). `synchronous` and `cache_size` are per-connection and safe under
332        // concurrency. Also do NOT touch `temp_store` — changing it drops the connection_context
333        // overlay temp table created by `set_context` above.
334        db.storage.execute_batch(
335            "PRAGMA synchronous = OFF;
336             PRAGMA cache_size = -262144;",
337        )?;
338        let result = (|| -> anyhow::Result<()> {
339            db.storage.execute_batch("BEGIN TRANSACTION")?;
340            db.clear_full_rebuild_tables()?;
341            db.set_meta("source_root", &config.root.display().to_string())?;
342            db.storage.set_source_root(config.root.clone());
343            db.write_git_meta(&config.root)?;
344            let indexed = db.index_targets_with_progress(config, &mut progress)?;
345            db.apply_prepared_git_history(
346                &config.root,
347                git_history
348                    .take()
349                    .ok_or_else(|| anyhow::anyhow!("git history preparation was already used"))?,
350            )?;
351            progress(IndexProgress::RebuildingLogicalSymbols);
352            db.rebuild_logical_symbols()?;
353            progress(IndexProgress::ResolvingGraph);
354            db.resolve_edges()?;
355            db.mark_graph_index_current()?;
356            progress(IndexProgress::RebuildingFts);
357            db.rebuild_fts()?;
358            db.set_meta("indexed_at_ms", &now_ms().to_string())?;
359            db.storage.execute_batch("COMMIT")?;
360            progress(IndexProgress::Finished { files: indexed });
361            Ok(())
362        })();
363        if result.is_err() {
364            if let Some(handle) = git_history.take() {
365                let _ = join_git_history_prepare(handle);
366            }
367            let _ = db.storage.execute_batch("ROLLBACK");
368        }
369        // Restore durable fsync behavior for any later writes on this connection (reconcile, etc.).
370        // cache_size is left bumped — harmless for the short remaining lifetime of the connection.
371        let _ = db.storage.execute_batch("PRAGMA synchronous = NORMAL;");
372        result?;
373        Ok(db)
374    }
375
376    fn clear_full_rebuild_tables(&self) -> anyhow::Result<()> {
377        // Stage the active context's file ids, then cascade-delete them and their derived rows.
378        self.storage.execute_batch(
379            "
380            CREATE TEMP TABLE IF NOT EXISTS staged_file_ids(id INTEGER PRIMARY KEY);
381            DELETE FROM temp.staged_file_ids;
382            INSERT OR IGNORE INTO temp.staged_file_ids(id)
383            SELECT id
384            FROM main.files
385            WHERE worktree_id = (SELECT value FROM temp.connection_context WHERE key = 'worktree_id')
386              AND worktree_id != '';
387            INSERT OR IGNORE INTO temp.staged_file_ids(id)
388            SELECT id
389            FROM main.files
390            WHERE commit_sha = (SELECT value FROM temp.connection_context WHERE key = 'commit_sha')
391              AND commit_sha != ''
392              AND path NOT IN (
393                  SELECT path FROM main.files
394                  WHERE worktree_id = (SELECT value FROM temp.connection_context WHERE key = 'worktree_id')
395                    AND worktree_id != ''
396              );
397            ",
398        )?;
399        self.delete_staged_files_cascade()?;
400        self.storage.execute_batch("DELETE FROM temp.staged_file_ids;")?;
401        Ok(())
402    }
403
404    /// Cascade-delete every derived row (edges, symbols, chunks, embeddings, FTS, blame, docs,
405    /// parser failures) for the file ids staged in `temp.staged_file_ids`, then the files
406    /// themselves. The caller is responsible for populating and clearing the temp table.
407    /// Shared by full rebuild (active context) and GC (dead, non-live contexts).
408    fn delete_staged_files_cascade(&self) -> anyhow::Result<()> {
409        self.storage.execute_batch(
410            "
411            UPDATE main.edges
412            SET to_symbol_id = NULL,
413                target_start_line = NULL,
414                target_end_line = NULL,
415                resolution = 'unresolved'
416            WHERE to_symbol_id IN (
417                SELECT symbols.id
418                FROM main.symbols
419                JOIN temp.staged_file_ids ON staged_file_ids.id = symbols.file_id
420            );
421            DELETE FROM main.edges
422            WHERE source_file_id IN (SELECT id FROM temp.staged_file_ids)
423               OR from_symbol_id IN (
424                    SELECT symbols.id
425                    FROM main.symbols
426                    JOIN temp.staged_file_ids ON staged_file_ids.id = symbols.file_id
427               );
428
429            DELETE FROM main.logical_symbol_members
430            WHERE symbol_id IN (
431                SELECT symbols.id
432                FROM main.symbols
433                JOIN temp.staged_file_ids ON staged_file_ids.id = symbols.file_id
434            );
435            DELETE FROM main.logical_symbols
436            WHERE id NOT IN (
437                SELECT logical_symbol_id FROM main.logical_symbol_members
438            );
439            DELETE FROM main.symbol_facts
440            WHERE symbol_id IN (
441                SELECT symbols.id
442                FROM main.symbols
443                JOIN temp.staged_file_ids ON staged_file_ids.id = symbols.file_id
444            );
445            DELETE FROM main.chunk_fts
446            WHERE rowid IN (
447                SELECT chunks.id
448                FROM main.chunks
449                JOIN temp.staged_file_ids ON staged_file_ids.id = chunks.file_id
450            );
451            DELETE FROM main.chunk_summaries
452            WHERE chunk_id IN (
453                SELECT chunks.id
454                FROM main.chunks
455                JOIN temp.staged_file_ids ON staged_file_ids.id = chunks.file_id
456            );
457            DELETE FROM main.chunk_embeddings
458            WHERE chunk_id IN (
459                SELECT chunks.id
460                FROM main.chunks
461                JOIN temp.staged_file_ids ON staged_file_ids.id = chunks.file_id
462            );
463            DELETE FROM main.git_chunk_blame
464            WHERE chunk_id IN (
465                SELECT chunks.id
466                FROM main.chunks
467                JOIN temp.staged_file_ids ON staged_file_ids.id = chunks.file_id
468            );
469            DELETE FROM main.docs
470            WHERE chunk_id IN (
471                SELECT chunks.id
472                FROM main.chunks
473                JOIN temp.staged_file_ids ON staged_file_ids.id = chunks.file_id
474            );
475            DELETE FROM main.parser_failures
476            WHERE path IN (
477                SELECT path
478                FROM main.files
479                JOIN temp.staged_file_ids ON staged_file_ids.id = files.id
480            );
481            DELETE FROM main.symbols
482            WHERE file_id IN (SELECT id FROM temp.staged_file_ids);
483            DELETE FROM main.chunks
484            WHERE file_id IN (SELECT id FROM temp.staged_file_ids);
485            DELETE FROM main.files
486            WHERE id IN (SELECT id FROM temp.staged_file_ids);
487            ",
488        )?;
489        Ok(())
490    }
491
492    pub fn index_changed(config: &Config) -> anyhow::Result<Self> {
493        Self::index_changed_with_progress(config, |_| {})
494    }
495
496    pub fn index_changed_with_progress<F>(config: &Config, mut progress: F) -> anyhow::Result<Self>
497    where
498        F: FnMut(IndexProgress),
499    {
500        Self::index_incremental_with_progress(config, IndexMode::Changed, &mut progress)
501    }
502
503    pub fn index_discover(config: &Config) -> anyhow::Result<Self> {
504        Self::index_discover_with_progress(config, |_| {})
505    }
506
507    pub fn index_discover_with_progress<F>(config: &Config, mut progress: F) -> anyhow::Result<Self>
508    where
509        F: FnMut(IndexProgress),
510    {
511        Self::index_incremental_with_progress(config, IndexMode::Discover, &mut progress)
512    }
513
514    fn index_incremental_with_progress<F>(
515        config: &Config,
516        mode: IndexMode,
517        progress: &mut F,
518    ) -> anyhow::Result<Self>
519    where
520        F: FnMut(IndexProgress),
521    {
522        if !config.database.exists() {
523            return Self::rebuild_with_progress(config, progress);
524        }
525        if Self::migration_check(&config.database)?.state == schema::SchemaState::Missing {
526            return Self::rebuild_with_progress(config, progress);
527        }
528
529        let mut db = Self::open(&config.database)?;
530        let (commit_sha, worktree_id) = resolve_git_context(&config.root);
531        db.set_context(&commit_sha, &worktree_id)?;
532        if db.indexed_file_count()? == 0 {
533            return Self::rebuild_with_progress(config, progress);
534        }
535        progress(IndexProgress::Started { database: config.database.clone(), mode });
536        progress(IndexProgress::IndexingGitHistory);
537        let mut git_history = Some(spawn_git_history_prepare(&config.root));
538        let result = (|| -> anyhow::Result<()> {
539            db.storage.execute_batch("BEGIN TRANSACTION")?;
540            db.set_meta("source_root", &config.root.display().to_string())?;
541            db.storage.set_source_root(config.root.clone());
542            db.write_git_meta(&config.root)?;
543            let indexed = match mode {
544                IndexMode::Changed => db.index_changed_files_with_progress(config, progress)?,
545                IndexMode::Discover => db.index_discovered_files_with_progress(config, progress)?,
546                IndexMode::Full => unreachable!("full mode is handled by rebuild_with_progress"),
547            };
548            db.apply_prepared_git_history(
549                &config.root,
550                git_history
551                    .take()
552                    .ok_or_else(|| anyhow::anyhow!("git history preparation was already used"))?,
553            )?;
554            if indexed > 0 {
555                progress(IndexProgress::RebuildingLogicalSymbols);
556                db.rebuild_logical_symbols()?;
557                progress(IndexProgress::ResolvingGraph);
558                db.resolve_edges()?;
559                db.mark_graph_index_current()?;
560                progress(IndexProgress::SyncingFts);
561                db.sync_fts()?;
562            }
563            db.set_meta("indexed_at_ms", &now_ms().to_string())?;
564            db.storage.execute_batch("COMMIT")?;
565            progress(IndexProgress::Finished { files: indexed });
566            Ok(())
567        })();
568        if result.is_err() {
569            if let Some(handle) = git_history.take() {
570                let _ = join_git_history_prepare(handle);
571            }
572            let _ = db.storage.execute_batch("ROLLBACK");
573        }
574        result?;
575        Ok(db)
576    }
577
578    pub fn index_targets(&self, config: &Config) -> anyhow::Result<()> {
579        self.index_targets_with_progress(config, &mut |_| {})?;
580        Ok(())
581    }
582
583    fn index_targets_with_progress<F>(
584        &self,
585        config: &Config,
586        progress: &mut F,
587    ) -> anyhow::Result<usize>
588    where
589        F: FnMut(IndexProgress),
590    {
591        progress(IndexProgress::Discovering);
592        let files = collect_index_files(config)?;
593        let changes = git_changed_paths(&config.root).unwrap_or_default();
594        let files = self.assign_file_scopes(files, &changes);
595        progress(IndexProgress::Discovered { files: files.len() });
596
597        let prepared = prepare_files_with_progress(&files, progress)?;
598        for (index, prepared_file) in prepared.iter().enumerate() {
599            let current = index + 1;
600            if should_report_file_progress(current, files.len()) {
601                progress(IndexProgress::IndexingFile {
602                    current,
603                    total: files.len(),
604                    path: prepared_file.file.relative_path.clone(),
605                    language: prepared_file.file.language,
606                    kind: prepared_file.file.kind,
607                });
608            }
609            self.insert_prepared_file(prepared_file)?;
610        }
611
612        Ok(files.len())
613    }
614
615    fn index_changed_files_with_progress<F>(
616        &self,
617        config: &Config,
618        progress: &mut F,
619    ) -> anyhow::Result<usize>
620    where
621        F: FnMut(IndexProgress),
622    {
623        progress(IndexProgress::Discovering);
624        let changes = git_changed_paths(&config.root)?;
625        let files = collect_changed_index_files(config, &changes)?;
626        let files = self.assign_file_scopes(files, &changes);
627        self.apply_incremental_file_plan(files, changes.deleted, progress)
628    }
629
630    fn index_discovered_files_with_progress<F>(
631        &self,
632        config: &Config,
633        progress: &mut F,
634    ) -> anyhow::Result<usize>
635    where
636        F: FnMut(IndexProgress),
637    {
638        progress(IndexProgress::Discovering);
639        let plan = discovery_plan(self.storage.connection(), config)?;
640        let changes = git_changed_paths(&config.root).unwrap_or_default();
641        let files = self.assign_file_scopes(plan.files, &changes);
642        self.apply_incremental_file_plan(files, plan.deleted, progress)
643    }
644
645    fn assign_file_scopes(
646        &self,
647        files: Vec<IndexFile>,
648        changes: &GitChangedPaths,
649    ) -> Vec<IndexFile> {
650        let has_base_commit = !self.active_commit_sha.is_empty();
651        files
652            .into_iter()
653            .map(|mut file| {
654                if !has_base_commit || changes.changed.contains(&file.relative_path) {
655                    file.commit_sha.clear();
656                    file.worktree_id.clone_from(&self.active_worktree_id);
657                } else {
658                    file.commit_sha.clone_from(&self.active_commit_sha);
659                    file.worktree_id.clear();
660                }
661                file
662            })
663            .collect()
664    }
665
666    fn apply_incremental_file_plan<F>(
667        &self,
668        files: Vec<IndexFile>,
669        deleted: BTreeSet<PathBuf>,
670        progress: &mut F,
671    ) -> anyhow::Result<usize>
672    where
673        F: FnMut(IndexProgress),
674    {
675        progress(IndexProgress::Discovered { files: files.len() });
676
677        let deleted_count = deleted.len();
678        for path in deleted {
679            self.mark_file_deleted(&path)?;
680        }
681
682        let prepared = prepare_files_with_progress(&files, progress)?;
683        for (index, prepared_file) in prepared.iter().enumerate() {
684            let current = index + 1;
685            if should_report_file_progress(current, files.len()) {
686                progress(IndexProgress::IndexingFile {
687                    current,
688                    total: files.len(),
689                    path: prepared_file.file.relative_path.clone(),
690                    language: prepared_file.file.language,
691                    kind: prepared_file.file.kind,
692                });
693            }
694            self.remove_file_in_scope(
695                &prepared_file.file.relative_path,
696                &prepared_file.file.commit_sha,
697                &prepared_file.file.worktree_id,
698            )?;
699            self.insert_prepared_file(prepared_file)?;
700        }
701
702        Ok(files.len() + deleted_count)
703    }
704
705    pub fn status(&self, database: &Path) -> anyhow::Result<IndexStatus> {
706        let mut counts = BTreeMap::new();
707        let mut stmt = self
708            .storage
709            .connection()
710            .prepare("SELECT language, COUNT(*) FROM files GROUP BY language ORDER BY language")?;
711        let rows =
712            stmt.query_map([], |row| Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?)))?;
713        for row in rows {
714            let (language, count) = row?;
715            counts.insert(language, u64::try_from(count).unwrap_or(0));
716        }
717
718        let content_revision = self.content_revision()?;
719        let fts_source_revision = self.meta("fts_source_revision")?;
720        let fts_dirty = self.fts_dirty()?;
721
722        Ok(IndexStatus {
723            database: database.display().to_string(),
724            exists: database.exists(),
725            schema: schema::status(self.storage.connection())?,
726            git_commit: self.meta("git_commit")?,
727            git_dirty: self.meta("git_dirty")?.map(|value| value == "true"),
728            indexed_at_ms: self.meta("indexed_at_ms")?.and_then(|value| value.parse::<i64>().ok()),
729            content_revision: content_revision.clone(),
730            fts_synced_at_ms: self
731                .meta("fts_synced_at_ms")?
732                .and_then(|value| value.parse::<i64>().ok()),
733            fts_dirty,
734            fts_fresh: !fts_dirty
735                && fts_source_revision.as_deref() == Some(content_revision.as_str()),
736            fts_source_revision,
737            file_count_by_language: counts,
738            parser_failures: self.parser_failure_count()?,
739            parser_failure_paths: self.parser_failure_paths()?,
740            git_history: self.git_history_status()?,
741            github: self.github_status()?,
742            local_ai: self.local_ai_status()?,
743        })
744    }
745
746    pub fn storage_status(&self) -> anyhow::Result<StorageStatus> {
747        self.storage.status()
748    }
749
750    pub fn discovery_status(&self, config: &Config) -> anyhow::Result<DiscoveryStatus> {
751        let plan = discovery_plan(self.storage.connection(), config)?;
752        let unindexed_source_files =
753            plan.unindexed.iter().filter(|file| file.kind == TargetKind::Source).count();
754        let unindexed_sample =
755            plan.unindexed.iter().take(10).map(|file| path_string(&file.relative_path)).collect();
756        let warning = (unindexed_source_files > 0).then(|| {
757            format!(
758                "{unindexed_source_files} unindexed source files detected. Run `rag-rat index --full` or `rag-rat index --discover`."
759            )
760        });
761        Ok(DiscoveryStatus {
762            discovered_files: plan.discovered_files,
763            indexed_files: plan.indexed_files,
764            unindexed_files: plan.unindexed.len(),
765            unindexed_source_files,
766            changed_indexed_files: plan.changed.len(),
767            removed_indexed_files: plan.deleted.len(),
768            unindexed_sample,
769            warning,
770        })
771    }
772
773    pub fn search(
774        &self,
775        query: &str,
776        limit: u32,
777        include_generated: bool,
778    ) -> anyhow::Result<Vec<SearchHit>> {
779        self.search_with_graph_meta(query, limit, include_generated, GraphMetaMode::Compact, 3)
780    }
781
782    pub fn search_explain(
783        &self,
784        query: &str,
785        limit: u32,
786        include_generated: bool,
787    ) -> anyhow::Result<Vec<SearchHit>> {
788        self.search_explain_with_graph_meta(
789            query,
790            limit,
791            include_generated,
792            GraphMetaMode::Compact,
793            3,
794        )
795    }
796
797    pub fn search_with_graph_meta(
798        &self,
799        query: &str,
800        limit: u32,
801        include_generated: bool,
802        graph_mode: GraphMetaMode,
803        graph_limit: u32,
804    ) -> anyhow::Result<Vec<SearchHit>> {
805        self.search_with_graph_meta_options(
806            query,
807            limit,
808            include_generated,
809            graph_mode,
810            graph_limit,
811            SearchOptions::default(),
812        )
813    }
814
815    pub fn search_with_graph_meta_options(
816        &self,
817        query: &str,
818        limit: u32,
819        include_generated: bool,
820        graph_mode: GraphMetaMode,
821        graph_limit: u32,
822        options: SearchOptions,
823    ) -> anyhow::Result<Vec<SearchHit>> {
824        self.ensure_fts_fresh()?;
825        let mut hits =
826            self.search_with_heal(query, limit, include_generated, true, false, options)?;
827        graph_meta::attach_to_search_hits(
828            self.storage.connection(),
829            &mut hits,
830            graph_mode,
831            graph_limit,
832        )?;
833        Ok(hits)
834    }
835
836    pub fn search_explain_with_graph_meta(
837        &self,
838        query: &str,
839        limit: u32,
840        include_generated: bool,
841        graph_mode: GraphMetaMode,
842        graph_limit: u32,
843    ) -> anyhow::Result<Vec<SearchHit>> {
844        self.search_explain_with_graph_meta_options(
845            query,
846            limit,
847            include_generated,
848            graph_mode,
849            graph_limit,
850            SearchOptions::default(),
851        )
852    }
853
854    pub fn search_explain_with_graph_meta_options(
855        &self,
856        query: &str,
857        limit: u32,
858        include_generated: bool,
859        graph_mode: GraphMetaMode,
860        graph_limit: u32,
861        options: SearchOptions,
862    ) -> anyhow::Result<Vec<SearchHit>> {
863        self.ensure_fts_fresh()?;
864        let mut hits =
865            self.search_with_heal(query, limit, include_generated, true, true, options)?;
866        graph_meta::attach_to_search_hits(
867            self.storage.connection(),
868            &mut hits,
869            graph_mode,
870            graph_limit,
871        )?;
872        Ok(hits)
873    }
874
875    pub fn symbols(
876        &self,
877        name: &str,
878        language: Option<Language>,
879        limit: u32,
880    ) -> anyhow::Result<Vec<crate::query::symbol::SymbolHit>> {
881        crate::query::symbol::lookup(self.storage.connection(), name, language, limit)
882    }
883
884    pub fn symbol_candidates(
885        &self,
886        selector: &crate::query::symbol::SymbolSelector,
887    ) -> anyhow::Result<crate::query::symbol::SymbolLookup> {
888        crate::query::symbol::lookup_candidates(self.storage.connection(), selector)
889    }
890
891    pub fn select_symbol(
892        &self,
893        selector: &crate::query::symbol::SymbolSelector,
894    ) -> anyhow::Result<
895        Result<Option<crate::query::symbol::SymbolHit>, crate::query::symbol::SymbolDisambiguation>,
896    > {
897        crate::query::symbol::select_one(self.storage.connection(), selector)
898    }
899
900    pub fn read_chunk(&self, chunk_id: i64) -> anyhow::Result<Option<crate::query::ReadChunk>> {
901        self.read_chunk_with_graph_and_memories(chunk_id, GraphMetaMode::Full, 20, true)
902    }
903
904    pub fn read_chunk_with_graph(
905        &self,
906        chunk_id: i64,
907        graph_mode: GraphMetaMode,
908        graph_limit: u32,
909    ) -> anyhow::Result<Option<crate::query::ReadChunk>> {
910        self.read_chunk_with_graph_and_memories(chunk_id, graph_mode, graph_limit, false)
911    }
912
913    pub fn read_chunk_with_graph_and_memories(
914        &self,
915        chunk_id: i64,
916        graph_mode: GraphMetaMode,
917        graph_limit: u32,
918        include_memories: bool,
919    ) -> anyhow::Result<Option<crate::query::ReadChunk>> {
920        let Some(mut chunk) = self.read_chunk_current(chunk_id)? else {
921            return Ok(None);
922        };
923        graph_meta::attach_to_read_chunk(
924            self.storage.connection(),
925            &mut chunk,
926            graph_mode,
927            graph_limit,
928        )?;
929        if include_memories {
930            chunk.memories =
931                crate::query::memory::memories_for_chunk(self.storage.connection(), chunk_id, 20)?;
932        }
933        Ok(Some(chunk))
934    }
935
936    fn read_chunk_current(&self, chunk_id: i64) -> anyhow::Result<Option<crate::query::ReadChunk>> {
937        let Some(mut chunk) = crate::query::read_chunk(self.storage.connection(), chunk_id)? else {
938            return Ok(None);
939        };
940        let Some(root) = self.storage.source_root() else {
941            return Ok(Some(chunk));
942        };
943        let source_path = root.join(&chunk.path);
944        let current_text = match fs::read_to_string(&source_path) {
945            Ok(text) => text,
946            Err(_) => {
947                let path = chunk.path.clone();
948                self.mark_file_deleted(Path::new(&path))?;
949                self.sync_fts()?;
950                anyhow::bail!(IndexError::Gone { chunk_id });
951            },
952        };
953        let anchor = self.chunk_anchor(chunk_id)?;
954        let status = anchors::validate(
955            &chunk.text,
956            usize::try_from(chunk.start_line).unwrap_or(1),
957            usize::try_from(chunk.end_line).unwrap_or(1),
958            &anchor,
959            &current_text,
960        );
961        match status {
962            AnchorStatus::Exact => {
963                if let Some(text) = anchors::slice_lines(
964                    &current_text,
965                    usize::try_from(chunk.start_line).unwrap_or(1),
966                    usize::try_from(chunk.end_line).unwrap_or(1),
967                ) {
968                    chunk.text = text;
969                }
970                Ok(Some(chunk))
971            },
972            AnchorStatus::Relocated { start_line, end_line, text } => {
973                chunk.start_line = i64::try_from(start_line)?;
974                chunk.end_line = i64::try_from(end_line)?;
975                chunk.text = text;
976                Ok(Some(chunk))
977            },
978            AnchorStatus::Stale => {
979                self.heal_file(Path::new(&chunk.path))?;
980                self.sync_fts()?;
981                let healed = crate::query::read_chunk(self.storage.connection(), chunk_id)?;
982                match healed {
983                    Some(chunk) => Ok(Some(chunk)),
984                    None => anyhow::bail!(IndexError::StaleChunk { chunk_id, path: chunk.path }),
985                }
986            },
987        }
988    }
989
990    pub fn search_hash_baseline(
991        &self,
992        query: &str,
993        limit: u32,
994        include_generated: bool,
995    ) -> anyhow::Result<Vec<SearchHit>> {
996        self.ensure_fts_fresh()?;
997        crate::search::lexical::search_hash_baseline(
998            self.storage.connection(),
999            query,
1000            limit,
1001            include_generated,
1002        )
1003    }
1004
1005    pub fn docs_for_symbol(&self, symbol: &str, limit: u32) -> anyhow::Result<Vec<SearchHit>> {
1006        self.search(symbol, limit, true)
1007    }
1008
1009    pub fn docs_for_selected_symbol(
1010        &self,
1011        symbol: &crate::query::symbol::SymbolHit,
1012        limit: u32,
1013    ) -> anyhow::Result<Vec<SearchHit>> {
1014        let mut hits = self.local_symbol_context_hits(symbol, limit)?;
1015        hits.extend(self.search(&symbol.name, limit.saturating_mul(4).max(limit), true)?);
1016        rank_docs_for_symbol(symbol, &mut hits);
1017        dedupe_search_hits(&mut hits);
1018        hits.truncate(usize::try_from(limit).unwrap_or(usize::MAX));
1019        Ok(hits)
1020    }
1021
1022    pub fn commit_search(&self, query: &str, limit: u32) -> anyhow::Result<Vec<CommitSearchHit>> {
1023        git_history::commit_search(self.storage.connection(), query, limit)
1024    }
1025
1026    pub fn git_history_for_path(
1027        &self,
1028        path: &str,
1029        limit: u32,
1030    ) -> anyhow::Result<Vec<PathHistoryItem>> {
1031        git_history::history_for_path(self.storage.connection(), path, limit)
1032    }
1033
1034    pub fn git_history_for_symbol(
1035        &self,
1036        symbol: &str,
1037        language: Option<Language>,
1038        limit: u32,
1039    ) -> anyhow::Result<Vec<SymbolHistoryItem>> {
1040        let symbols = self.symbols(symbol, language, limit)?;
1041        let per_symbol_limit = limit.max(1);
1042        let mut out = Vec::new();
1043        for symbol_hit in symbols {
1044            for commit in self.git_history_for_path(&symbol_hit.path, per_symbol_limit)? {
1045                out.push(SymbolHistoryItem {
1046                    symbol: symbol_hit.name.clone(),
1047                    qualified_name: symbol_hit.qualified_name.clone(),
1048                    path: symbol_hit.path.clone(),
1049                    start_byte: symbol_hit.start_byte,
1050                    end_byte: symbol_hit.end_byte,
1051                    commit,
1052                    evidence_kind: "historical",
1053                });
1054                if out.len() >= usize::try_from(limit).unwrap_or(usize::MAX) {
1055                    return Ok(out);
1056                }
1057            }
1058        }
1059        Ok(out)
1060    }
1061
1062    pub fn commits_touching_query(
1063        &self,
1064        query: &str,
1065        limit: u32,
1066    ) -> anyhow::Result<Vec<QueryCommitHit>> {
1067        let current_hits = self.search(query, limit, true)?;
1068        git_history::commits_touching_query(self.storage.connection(), query, limit, &current_hits)
1069    }
1070
1071    pub fn git_blame_chunk(&self, chunk_id: i64) -> anyhow::Result<Option<ChunkBlameSummary>> {
1072        let Some(chunk) = self.read_chunk(chunk_id)? else {
1073            return Ok(None);
1074        };
1075        let source_text_hash = git_history::source_text_hash(&chunk.text);
1076        if let Some(cached) =
1077            git_history::cached_blame(self.storage.connection(), chunk_id, &source_text_hash)?
1078        {
1079            return Ok(Some(cached));
1080        }
1081        let Some(root) = self.storage.source_root() else {
1082            return Ok(Some(ChunkBlameSummary {
1083                chunk_id,
1084                path: chunk.path,
1085                start_line: chunk.start_line,
1086                end_line: chunk.end_line,
1087                source_text_hash,
1088                line_count: 0,
1089                dominant_commit: None,
1090                dominant_commit_lines: 0,
1091                newest_commit: None,
1092                newest_commit_time_s: None,
1093                oldest_commit: None,
1094                oldest_commit_time_s: None,
1095                commit_counts: BTreeMap::new(),
1096                evidence_kind: "historical",
1097            }));
1098        };
1099        let blame_lines =
1100            git_history::blame_lines(root, &chunk.path, chunk.start_line, chunk.end_line);
1101        let mut counts = BTreeMap::<String, i64>::new();
1102        let mut newest = None::<(String, i64)>;
1103        let mut oldest = None::<(String, i64)>;
1104        for line in &blame_lines {
1105            *counts.entry(line.commit.clone()).or_default() += 1;
1106            if let Some(time) = line.author_time_s {
1107                if newest.as_ref().is_none_or(|(_, newest_time)| time > *newest_time) {
1108                    newest = Some((line.commit.clone(), time));
1109                }
1110                if oldest.as_ref().is_none_or(|(_, oldest_time)| time < *oldest_time) {
1111                    oldest = Some((line.commit.clone(), time));
1112                }
1113            }
1114        }
1115        let dominant = counts
1116            .iter()
1117            .max_by_key(|(commit, count)| (*count, *commit))
1118            .map(|(commit, count)| (commit.clone(), *count));
1119        let summary = ChunkBlameSummary {
1120            chunk_id,
1121            path: chunk.path,
1122            start_line: chunk.start_line,
1123            end_line: chunk.end_line,
1124            source_text_hash,
1125            line_count: i64::try_from(blame_lines.len()).unwrap_or(i64::MAX),
1126            dominant_commit: dominant.as_ref().map(|(commit, _)| commit.clone()),
1127            dominant_commit_lines: dominant.map(|(_, count)| count).unwrap_or(0),
1128            newest_commit: newest.as_ref().map(|(commit, _)| commit.clone()),
1129            newest_commit_time_s: newest.as_ref().map(|(_, time)| *time),
1130            oldest_commit: oldest.as_ref().map(|(commit, _)| commit.clone()),
1131            oldest_commit_time_s: oldest.as_ref().map(|(_, time)| *time),
1132            commit_counts: counts,
1133            evidence_kind: "historical",
1134        };
1135        git_history::store_blame(self.storage.connection(), &summary)?;
1136        Ok(Some(summary))
1137    }
1138
1139    pub fn github_sync_from_refs(&self, offline: bool) -> anyhow::Result<GitHubSyncReport> {
1140        self.github_sync_from_refs_with_progress(offline, |_| {})
1141    }
1142
1143    pub fn github_sync_from_refs_with_progress(
1144        &self,
1145        offline: bool,
1146        progress: impl FnMut(github::GitHubSyncProgress),
1147    ) -> anyhow::Result<GitHubSyncReport> {
1148        let Some(root) = self.storage.source_root() else {
1149            anyhow::bail!("index has no source_root metadata; rebuild required");
1150        };
1151        if offline {
1152            github::sync_from_refs::<github::GhCliGitHubClient>(
1153                self.storage.connection(),
1154                root,
1155                None,
1156                true,
1157            )
1158        } else {
1159            let client = github::GhCliGitHubClient;
1160            github::sync_from_refs_with_progress(
1161                self.storage.connection(),
1162                root,
1163                Some(&client),
1164                false,
1165                progress,
1166            )
1167        }
1168    }
1169
1170    pub fn github_sync_issue(
1171        &self,
1172        issue_ref: &str,
1173        offline: bool,
1174    ) -> anyhow::Result<GitHubSyncReport> {
1175        if offline {
1176            github::sync_issue::<github::GhCliGitHubClient>(
1177                self.storage.connection(),
1178                issue_ref,
1179                None,
1180                true,
1181            )
1182        } else {
1183            let client = github::GhCliGitHubClient;
1184            github::sync_issue(self.storage.connection(), issue_ref, Some(&client), false)
1185        }
1186    }
1187
1188    pub fn github_issue_search(
1189        &self,
1190        query: &str,
1191        limit: u32,
1192    ) -> anyhow::Result<Vec<GitHubEvidence>> {
1193        github::issue_search(self.storage.connection(), query, limit)
1194    }
1195
1196    pub fn rationale_search(&self, query: &str, limit: u32) -> anyhow::Result<Vec<GitHubEvidence>> {
1197        github::rationale_search(self.storage.connection(), query, limit)
1198    }
1199
1200    pub fn github_refs_for_path(
1201        &self,
1202        path: &str,
1203        limit: u32,
1204    ) -> anyhow::Result<Vec<github::GitHubRef>> {
1205        github::refs_for_path(self.storage.connection(), path, limit)
1206    }
1207
1208    pub fn github_sync_status(&self) -> anyhow::Result<GitHubStatus> {
1209        self.github_status()
1210    }
1211
1212    pub fn papertrail_for_chunk(
1213        &self,
1214        chunk_id: i64,
1215        limit: u32,
1216    ) -> anyhow::Result<Option<Papertrail>> {
1217        let Some(chunk) = self.read_chunk(chunk_id)? else {
1218            return Ok(None);
1219        };
1220        Ok(Some(github::papertrail_for_chunk(self.storage.connection(), &chunk, limit)?))
1221    }
1222
1223    pub fn papertrail_for_symbol(
1224        &self,
1225        symbol: &str,
1226        language: Option<Language>,
1227        limit: u32,
1228    ) -> anyhow::Result<Option<Papertrail>> {
1229        let Some(symbol) = self.symbols(symbol, language, limit)?.into_iter().next() else {
1230            return Ok(None);
1231        };
1232        Ok(Some(github::papertrail_for_symbol(self.storage.connection(), &symbol, limit)?))
1233    }
1234
1235    pub fn papertrail_for_selected_symbol(
1236        &self,
1237        symbol: &crate::query::symbol::SymbolHit,
1238        limit: u32,
1239    ) -> anyhow::Result<Papertrail> {
1240        github::papertrail_for_symbol(self.storage.connection(), symbol, limit)
1241    }
1242
1243    pub fn papertrail_for_commit(
1244        &self,
1245        commit_hash: &str,
1246        limit: u32,
1247    ) -> anyhow::Result<Papertrail> {
1248        github::papertrail_for_commit(self.storage.connection(), commit_hash, limit)
1249    }
1250
1251    pub fn local_ai_status(&self) -> anyhow::Result<LocalAiStatus> {
1252        ai::status(self.storage.connection())
1253    }
1254
1255    pub fn list_models(&self) -> anyhow::Result<Vec<ModelInfo>> {
1256        ai::models(self.storage.connection())
1257    }
1258
1259    pub fn install_model(&self, model_id: &str) -> anyhow::Result<ModelInfo> {
1260        ai::install_model(self.storage.connection(), model_id)
1261    }
1262
1263    pub fn reconcile(
1264        &self,
1265        limit: Option<u32>,
1266        batch_size: Option<u32>,
1267    ) -> anyhow::Result<ReconcileReport> {
1268        ai::reconcile(self.storage.connection(), limit, batch_size)
1269    }
1270
1271    pub fn reconcile_plan(&self) -> anyhow::Result<ReconcilePlan> {
1272        ai::reconcile_plan(self.storage.connection())
1273    }
1274
1275    pub fn reconcile_with_progress(
1276        &self,
1277        limit: Option<u32>,
1278        batch_size: Option<u32>,
1279        force: bool,
1280        progress: impl FnMut(ai::ReconcileProgress),
1281    ) -> anyhow::Result<ReconcileReport> {
1282        ai::reconcile_with_progress(self.storage.connection(), limit, batch_size, force, progress)
1283    }
1284
1285    pub fn reconcile_with_options_progress(
1286        &self,
1287        options: ai::ReconcileOptions,
1288        progress: impl FnMut(ai::ReconcileProgress),
1289    ) -> anyhow::Result<ReconcileReport> {
1290        ai::reconcile_with_options_progress(self.storage.connection(), options, progress)
1291    }
1292
1293    /// Garbage-collect index rows for git contexts that are no longer live. Keeps the active
1294    /// commit and overlay of every worktree reported by `git worktree list` (plus this
1295    /// connection's active context) and prunes file/chunk/embedding/symbol/edge rows for any
1296    /// other commit. Never prunes when no live context can be determined (non-git, git error).
1297    pub fn gc(&self) -> anyhow::Result<GcReport> {
1298        let mut live_commits = Vec::new();
1299        let mut live_worktrees = Vec::new();
1300        if let Some(root) = self.storage.source_root() {
1301            let (commits, worktrees) = live_worktree_contexts(root);
1302            live_commits.extend(commits);
1303            live_worktrees.extend(worktrees);
1304        }
1305        // Always keep this connection's active context, even if git enumeration missed it.
1306        if !self.active_commit_sha.is_empty() {
1307            live_commits.push(self.active_commit_sha.clone());
1308        }
1309        if !self.active_worktree_id.is_empty() {
1310            live_worktrees.push(self.active_worktree_id.clone());
1311        }
1312        live_commits.sort();
1313        live_commits.dedup();
1314        live_worktrees.sort();
1315        live_worktrees.dedup();
1316        self.prune_to_live(&live_commits, &live_worktrees)
1317    }
1318
1319    /// Prune file rows (and their derived rows) whose `commit_sha` and `worktree_id` are both
1320    /// outside the live sets. Refuses to prune when both live sets are empty, so a missing
1321    /// live set never wipes the index. `parser_failures` are keyed by path (shared across
1322    /// commits) and are regenerated on the next index, so they are not preserved per-commit.
1323    pub fn prune_to_live(
1324        &self,
1325        live_commits: &[String],
1326        live_worktrees: &[String],
1327    ) -> anyhow::Result<GcReport> {
1328        let conn = self.storage.connection();
1329        let files_before = table_row_count(conn, "files")?;
1330        let chunks_before = table_row_count(conn, "chunks")?;
1331        if live_commits.is_empty() && live_worktrees.is_empty() {
1332            return Ok(GcReport {
1333                files_pruned: 0,
1334                chunks_pruned: 0,
1335                files_remaining: files_before,
1336                chunks_remaining: chunks_before,
1337                skipped: true,
1338            });
1339        }
1340        conn.execute_batch(
1341            "
1342            CREATE TEMP TABLE IF NOT EXISTS gc_live_commits(sha TEXT PRIMARY KEY);
1343            DELETE FROM temp.gc_live_commits;
1344            CREATE TEMP TABLE IF NOT EXISTS gc_live_worktrees(id TEXT PRIMARY KEY);
1345            DELETE FROM temp.gc_live_worktrees;
1346            CREATE TEMP TABLE IF NOT EXISTS staged_file_ids(id INTEGER PRIMARY KEY);
1347            DELETE FROM temp.staged_file_ids;
1348            ",
1349        )?;
1350        {
1351            let mut stmt =
1352                conn.prepare("INSERT OR IGNORE INTO temp.gc_live_commits(sha) VALUES (?1)")?;
1353            for sha in live_commits {
1354                stmt.execute([sha])?;
1355            }
1356        }
1357        {
1358            let mut stmt =
1359                conn.prepare("INSERT OR IGNORE INTO temp.gc_live_worktrees(id) VALUES (?1)")?;
1360            for id in live_worktrees {
1361                stmt.execute([id])?;
1362            }
1363        }
1364        // A file survives if its commit is live OR its worktree overlay is live. Empty-string
1365        // keys never appear in the live sets, so unkeyed rows are pruned.
1366        conn.execute(
1367            "
1368            INSERT OR IGNORE INTO temp.staged_file_ids(id)
1369            SELECT id FROM main.files
1370            WHERE commit_sha NOT IN (SELECT sha FROM temp.gc_live_commits)
1371              AND worktree_id NOT IN (SELECT id FROM temp.gc_live_worktrees)
1372            ",
1373            [],
1374        )?;
1375        self.delete_staged_files_cascade()?;
1376        conn.execute_batch("DELETE FROM temp.staged_file_ids;")?;
1377        let files_remaining = table_row_count(conn, "files")?;
1378        let chunks_remaining = table_row_count(conn, "chunks")?;
1379        Ok(GcReport {
1380            files_pruned: files_before.saturating_sub(files_remaining),
1381            chunks_pruned: chunks_before.saturating_sub(chunks_remaining),
1382            files_remaining,
1383            chunks_remaining,
1384            skipped: false,
1385        })
1386    }
1387
1388    pub fn current_embedding_count(&self, model_id: &str) -> anyhow::Result<u64> {
1389        ai::current_embedding_count(self.storage.connection(), model_id)
1390    }
1391
1392    pub fn heal_index(&self, limit: Option<u32>) -> anyhow::Result<HealIndexReport> {
1393        let Some(root) = self.storage.source_root() else {
1394            anyhow::bail!("heal_index requires source_root metadata; run `rag-rat index` first");
1395        };
1396        let indexed_files = self.indexed_files()?;
1397        let max_repairs = limit.map(usize::try_from).transpose()?.unwrap_or(usize::MAX);
1398        let mut report = HealIndexReport {
1399            checked_files: 0,
1400            healed_files: 0,
1401            removed_files: 0,
1402            skipped_files: 0,
1403            fts_fresh: false,
1404            message: None,
1405        };
1406
1407        for file in indexed_files {
1408            report.checked_files += 1;
1409            let path = Path::new(&file.path);
1410            let full_path = root.join(path);
1411            let Ok(text) = fs::read_to_string(&full_path) else {
1412                if usize::try_from(report.healed_files + report.removed_files).unwrap_or(usize::MAX)
1413                    >= max_repairs
1414                {
1415                    report.message =
1416                        Some("limit reached; rerun heal_index to continue".to_string());
1417                    break;
1418                }
1419                self.mark_file_deleted(path)?;
1420                report.removed_files += 1;
1421                continue;
1422            };
1423            let sha256 = hex_sha256(text.as_bytes());
1424            if sha256 == file.sha256 {
1425                report.skipped_files += 1;
1426                continue;
1427            }
1428            if usize::try_from(report.healed_files + report.removed_files).unwrap_or(usize::MAX)
1429                >= max_repairs
1430            {
1431                report.message = Some("limit reached; rerun heal_index to continue".to_string());
1432                break;
1433            }
1434            self.heal_file(path)?;
1435            report.healed_files += 1;
1436        }
1437
1438        if report.healed_files > 0 || report.removed_files > 0 {
1439            self.sync_fts()?;
1440        } else {
1441            self.ensure_fts_fresh()?;
1442        }
1443        report.fts_fresh = !self.fts_dirty()?;
1444        Ok(report)
1445    }
1446
1447    pub fn ffi_surface(&self, limit: u32) -> anyhow::Result<Vec<crate::query::impact::ImpactItem>> {
1448        crate::query::impact::ffi_surface(self.storage.connection(), limit)
1449    }
1450
1451    pub fn find_callers(
1452        &self,
1453        symbol: &str,
1454        limit: u32,
1455    ) -> anyhow::Result<Vec<crate::query::graph::GraphHop>> {
1456        crate::query::graph::traverse(self.storage.connection(), symbol, true, limit)
1457    }
1458
1459    pub fn find_callers_with_options(
1460        &self,
1461        symbol: &str,
1462        limit: u32,
1463        options: &crate::query::graph::GraphTraversalOptions,
1464    ) -> anyhow::Result<Vec<crate::query::graph::GraphHop>> {
1465        let options = self.graph_options_with_logical_group(options)?;
1466        crate::query::graph::traverse_with_options(
1467            self.storage.connection(),
1468            symbol,
1469            true,
1470            limit,
1471            &options,
1472        )
1473    }
1474
1475    pub fn trace_callees(
1476        &self,
1477        symbol: &str,
1478        limit: u32,
1479    ) -> anyhow::Result<Vec<crate::query::graph::GraphHop>> {
1480        crate::query::graph::traverse(self.storage.connection(), symbol, false, limit)
1481    }
1482
1483    pub fn trace_callees_with_options(
1484        &self,
1485        symbol: &str,
1486        limit: u32,
1487        options: &crate::query::graph::GraphTraversalOptions,
1488    ) -> anyhow::Result<Vec<crate::query::graph::GraphHop>> {
1489        let options = self.graph_options_with_logical_group(options)?;
1490        crate::query::graph::traverse_with_options(
1491            self.storage.connection(),
1492            symbol,
1493            false,
1494            limit,
1495            &options,
1496        )
1497    }
1498
1499    pub fn graph_traversal_report(
1500        &self,
1501        tool: &str,
1502        symbol: &crate::query::symbol::SymbolHit,
1503        reverse: bool,
1504        limit: u32,
1505        options: &crate::query::graph::GraphTraversalOptions,
1506    ) -> anyhow::Result<crate::query::graph::GraphTraversalReport> {
1507        let options = self.graph_options_with_logical_group(options)?;
1508        let results = crate::query::graph::traverse_with_options(
1509            self.storage.connection(),
1510            &symbol.qualified_name,
1511            reverse,
1512            limit,
1513            &options,
1514        )?;
1515        let summary = crate::query::graph::traversal_summary(
1516            self.storage.connection(),
1517            &symbol.qualified_name,
1518            reverse,
1519            limit,
1520            &options,
1521            results.len(),
1522        )?;
1523        let (logical_symbol, variants) = self.graph_logical_symbol(options.logical_symbol_id)?;
1524        let mut paths = BTreeSet::new();
1525        paths.insert(symbol.path.clone());
1526        for result in &results {
1527            if let Some(callsite) = &result.callsite {
1528                paths.insert(callsite.path.clone());
1529            }
1530        }
1531        let mut coverage = self.graph_coverage(paths)?;
1532        if summary.unresolved > 0 {
1533            coverage.known_index_gaps.push(format!(
1534                "{} unresolved qualified callsites match the requested final segment but are not verified to this symbol",
1535                summary.unresolved
1536            ));
1537        }
1538        Ok(crate::query::graph::GraphTraversalReport {
1539            query: crate::query::graph::GraphTraversalQuery {
1540                tool: tool.to_string(),
1541                symbol_id: Some(symbol.symbol_id),
1542                logical_symbol_id: options.logical_symbol_id,
1543                symbol_path: symbol.qualified_name.clone(),
1544                resolution: options.resolution_mode.as_str().to_string(),
1545            },
1546            logical_symbol,
1547            variants,
1548            summary,
1549            coverage,
1550            results,
1551        })
1552    }
1553
1554    pub fn compare_graph_to_text(
1555        &self,
1556        symbol: &crate::query::symbol::SymbolHit,
1557        pattern: &str,
1558        limit: u32,
1559        options: &crate::query::graph::GraphTraversalOptions,
1560        include_tests: bool,
1561    ) -> anyhow::Result<crate::query::graph::CompareGraphTextReport> {
1562        let regex = Regex::new(pattern)?;
1563        let options = self.graph_options_with_logical_group(options)?;
1564        let mut graph_edges = crate::query::graph::traverse_with_options(
1565            self.storage.connection(),
1566            &symbol.qualified_name,
1567            true,
1568            limit,
1569            &options,
1570        )?;
1571        if !include_tests {
1572            graph_edges.retain(|edge| {
1573                edge.callsite.as_ref().is_none_or(|callsite| !is_test_like_path(&callsite.path))
1574            });
1575        }
1576        let (logical_symbol, variants) = self.graph_logical_symbol(options.logical_symbol_id)?;
1577        let text_hits = self.regex_hits(pattern, &regex, include_tests)?;
1578        let text_by_location = text_hits
1579            .iter()
1580            .map(|hit| ((hit.path.clone(), hit.line), hit))
1581            .collect::<BTreeMap<_, _>>();
1582        let graph_by_location = graph_edges
1583            .iter()
1584            .filter_map(|edge| {
1585                edge.callsite
1586                    .as_ref()
1587                    .map(|callsite| ((callsite.path.clone(), callsite.line), edge))
1588            })
1589            .collect::<BTreeMap<_, _>>();
1590
1591        let mut paths = BTreeSet::new();
1592        paths.insert(symbol.path.clone());
1593        for hit in &text_hits {
1594            paths.insert(hit.path.clone());
1595        }
1596        for edge in &graph_edges {
1597            if let Some(callsite) = &edge.callsite {
1598                paths.insert(callsite.path.clone());
1599            }
1600        }
1601
1602        let parser_failure_paths = self
1603            .parser_failure_paths()?
1604            .into_iter()
1605            .map(|failure| failure.path)
1606            .collect::<BTreeSet<_>>();
1607        let mut matched_hits = Vec::new();
1608        let mut text_only_hits = Vec::new();
1609        let mut likely_parser_gaps = Vec::new();
1610        for hit in &text_hits {
1611            if let Some(edge) = graph_by_location.get(&(hit.path.clone(), hit.line)) {
1612                matched_hits.push(crate::query::graph::MatchedGraphTextHit {
1613                    path: hit.path.clone(),
1614                    line: hit.line,
1615                    text: hit.text.clone(),
1616                    target: edge.target.clone(),
1617                    edge_kind: edge.edge_kind.clone(),
1618                    confidence: edge.confidence.clone(),
1619                    resolution: edge.resolution.clone(),
1620                });
1621            } else {
1622                let gap_kind = classify_text_only_hit(&hit.path, &hit.text, &parser_failure_paths);
1623                let text_only_hit = crate::query::graph::TextOnlyHit {
1624                    path: hit.path.clone(),
1625                    line: hit.line,
1626                    text: hit.text.clone(),
1627                    reason: if gap_kind == "parser_call_extraction" || gap_kind == "parser_failure"
1628                    {
1629                        "no graph edge extracted"
1630                    } else {
1631                        "text mention outside graph-call evidence"
1632                    }
1633                    .to_string(),
1634                    likely_gap: gap_kind.to_string(),
1635                };
1636                if is_likely_parser_gap_kind(gap_kind) {
1637                    likely_parser_gaps.push(text_only_hit.clone());
1638                }
1639                text_only_hits.push(text_only_hit);
1640            }
1641        }
1642
1643        let mut graph_only_edges = Vec::new();
1644        let mut likely_false_positives = Vec::new();
1645        for edge in &graph_edges {
1646            let Some(callsite) = &edge.callsite else {
1647                continue;
1648            };
1649            if text_by_location.contains_key(&(callsite.path.clone(), callsite.line)) {
1650                continue;
1651            }
1652            let current_line = self.current_line_text(&callsite.path, callsite.line)?;
1653            let graph_only = crate::query::graph::GraphOnlyEdge {
1654                path: callsite.path.clone(),
1655                line: callsite.line,
1656                target: edge.target.clone(),
1657                edge_kind: edge.edge_kind.clone(),
1658                confidence: edge.confidence.clone(),
1659                resolution: edge.resolution.clone(),
1660                evidence: edge.evidence.clone(),
1661                reason: "graph edge exists but pattern did not match text".to_string(),
1662                likely_reason: graph_only_reason(edge, current_line.as_deref()),
1663            };
1664            if is_likely_false_positive_graph_only(edge, &graph_only) {
1665                likely_false_positives.push(graph_only.clone());
1666            }
1667            graph_only_edges.push(graph_only);
1668        }
1669        let complete = likely_parser_gaps.is_empty() && likely_false_positives.is_empty();
1670        let recommended_fallback =
1671            recommended_graph_text_fallback(&likely_parser_gaps, &graph_only_edges);
1672        let pattern_match_mode = compare_pattern_match_mode(pattern, &symbol.name);
1673        let mut warnings = Vec::new();
1674        if pattern_match_mode == "substring_identifier" {
1675            warnings.push(format!(
1676                "pattern may match identifiers that merely contain `{}`; use an identifier boundary or escaped call suffix for exact text auditing",
1677                symbol.name
1678            ));
1679        }
1680
1681        Ok(crate::query::graph::CompareGraphTextReport {
1682            query: crate::query::graph::CompareGraphTextQuery {
1683                symbol_id: Some(symbol.symbol_id),
1684                logical_symbol_id: options.logical_symbol_id,
1685                symbol_path: symbol.qualified_name.clone(),
1686                pattern: pattern.to_string(),
1687                resolution: options.resolution_mode.as_str().to_string(),
1688                include_tests,
1689            },
1690            logical_symbol,
1691            variants,
1692            summary: crate::query::graph::CompareGraphTextSummary {
1693                graph_hits: u64::try_from(graph_edges.len()).unwrap_or(u64::MAX),
1694                graph_edges: u64::try_from(graph_edges.len()).unwrap_or(u64::MAX),
1695                text_hits: u64::try_from(text_hits.len()).unwrap_or(u64::MAX),
1696                matched: u64::try_from(matched_hits.len()).unwrap_or(u64::MAX),
1697                graph_only: u64::try_from(graph_only_edges.len()).unwrap_or(u64::MAX),
1698                text_only: u64::try_from(text_only_hits.len()).unwrap_or(u64::MAX),
1699                text_mentions: u64::try_from(text_only_hits.len() - likely_parser_gaps.len())
1700                    .unwrap_or(u64::MAX),
1701                likely_parser_gaps: u64::try_from(likely_parser_gaps.len()).unwrap_or(u64::MAX),
1702                likely_false_positives: u64::try_from(likely_false_positives.len())
1703                    .unwrap_or(u64::MAX),
1704                likely_index_gaps: u64::try_from(likely_parser_gaps.len()).unwrap_or(u64::MAX),
1705                complete,
1706                recommended_fallback,
1707                pattern_match_mode,
1708                warnings,
1709            },
1710            coverage: self.graph_coverage(paths)?,
1711            matched_hits,
1712            text_only_hits,
1713            graph_only_edges,
1714            likely_parser_gaps,
1715            likely_false_positives,
1716        })
1717    }
1718
1719    fn graph_logical_symbol(
1720        &self,
1721        logical_symbol_id: Option<i64>,
1722    ) -> anyhow::Result<(
1723        Option<crate::query::graph::LogicalSymbol>,
1724        Vec<crate::query::graph::LogicalSymbolVariant>,
1725    )> {
1726        let Some(logical_symbol_id) = logical_symbol_id else {
1727            return Ok((None, Vec::new()));
1728        };
1729        let Some(logical) = crate::query::symbol::lookup_logical_by_id(
1730            self.storage.connection(),
1731            logical_symbol_id,
1732        )?
1733        else {
1734            return Ok((None, Vec::new()));
1735        };
1736        let variants = crate::query::symbol::logical_members(
1737            self.storage.connection(),
1738            logical.logical_symbol_id,
1739        )?
1740        .into_iter()
1741        .map(|member| crate::query::graph::LogicalSymbolVariant {
1742            symbol_id: member.symbol_id,
1743            cfg_expr: member.cfg_expr,
1744            signature_hash: member.signature_hash,
1745            start_line: member.start_line,
1746            end_line: member.end_line,
1747        })
1748        .collect::<Vec<_>>();
1749        Ok((
1750            Some(crate::query::graph::LogicalSymbol {
1751                logical_symbol_id: logical.logical_symbol_id,
1752                qualified_name: logical.qualified_name,
1753                variant_count: logical.variant_count,
1754                group_reason: logical.group_reason,
1755            }),
1756            variants,
1757        ))
1758    }
1759
1760    fn graph_options_with_logical_group(
1761        &self,
1762        options: &crate::query::graph::GraphTraversalOptions,
1763    ) -> anyhow::Result<crate::query::graph::GraphTraversalOptions> {
1764        if options.logical_symbol_id.is_some() {
1765            return Ok(options.clone());
1766        }
1767        let Some(symbol_id) = options.symbol_id else {
1768            return Ok(options.clone());
1769        };
1770        let Some(logical) =
1771            crate::query::symbol::logical_for_symbol_id(self.storage.connection(), symbol_id)?
1772        else {
1773            return Ok(options.clone());
1774        };
1775        let mut options = options.clone();
1776        options.logical_symbol_id = Some(logical.logical_symbol_id);
1777        Ok(options)
1778    }
1779
1780    fn local_symbol_context_hits(
1781        &self,
1782        symbol: &crate::query::symbol::SymbolHit,
1783        limit: u32,
1784    ) -> anyhow::Result<Vec<SearchHit>> {
1785        let mut stmt = self.storage.connection().prepare(
1786            "
1787            SELECT chunks.id, files.path, files.language, files.kind,
1788                   chunks.start_line, chunks.end_line, chunks.symbol_path, chunks.text
1789            FROM chunks
1790            JOIN files ON files.id = chunks.file_id
1791            WHERE files.path = ?1
1792              AND (
1793                chunks.symbol_path = ?2
1794                OR chunks.symbol_path LIKE ?3
1795                OR chunks.text LIKE ?4
1796              )
1797            ORDER BY
1798              CASE
1799                WHEN chunks.symbol_path = ?2 THEN 0
1800                WHEN chunks.symbol_path LIKE ?3 THEN 1
1801                ELSE 2
1802              END,
1803              chunks.start_line
1804            LIMIT ?5
1805            ",
1806        )?;
1807        let rows = stmt.query_map(
1808            params![
1809                symbol.path,
1810                symbol.qualified_name,
1811                format!("%{}%", symbol.name),
1812                format!("%{}%", symbol.name),
1813                i64::from(limit.max(1)),
1814            ],
1815            |row| {
1816                let text: String = row.get(7)?;
1817                Ok(SearchHit {
1818                    chunk_id: row.get(0)?,
1819                    path: row.get(1)?,
1820                    language: row.get(2)?,
1821                    kind: row.get(3)?,
1822                    start_line: row.get(4)?,
1823                    end_line: row.get(5)?,
1824                    symbol_path: row.get(6)?,
1825                    score: 1.0,
1826                    summary: bounded_summary(&text),
1827                    graph: None,
1828                    score_components: None,
1829                })
1830            },
1831        )?;
1832        let mut hits = Vec::new();
1833        for row in rows {
1834            hits.push(row?);
1835        }
1836        Ok(hits)
1837    }
1838
1839    pub fn impact_surface(
1840        &self,
1841        query: &str,
1842        limit: u32,
1843    ) -> anyhow::Result<Vec<crate::query::impact::ImpactItem>> {
1844        crate::query::impact::impact_surface(self.storage.connection(), query, limit)
1845    }
1846
1847    pub fn impact_surface_with_options(
1848        &self,
1849        query: &str,
1850        limit: u32,
1851        resolution_mode: crate::query::graph::GraphResolutionMode,
1852    ) -> anyhow::Result<Vec<crate::query::impact::ImpactItem>> {
1853        crate::query::impact::impact_surface_with_options(
1854            self.storage.connection(),
1855            query,
1856            limit,
1857            resolution_mode,
1858        )
1859    }
1860
1861    pub fn impact_surface_for_selected_symbol(
1862        &self,
1863        symbol: &crate::query::symbol::SymbolHit,
1864        limit: u32,
1865        resolution_mode: crate::query::graph::GraphResolutionMode,
1866    ) -> anyhow::Result<Vec<crate::query::impact::ImpactItem>> {
1867        crate::query::impact::impact_surface_for_symbol(
1868            self.storage.connection(),
1869            symbol,
1870            limit,
1871            resolution_mode,
1872        )
1873    }
1874
1875    pub fn impact_surface_report_for_selected_symbol(
1876        &self,
1877        symbol: &crate::query::symbol::SymbolHit,
1878        limit: u32,
1879        options: &crate::query::impact::ImpactSurfaceOptions,
1880    ) -> anyhow::Result<crate::query::impact::ImpactSurfaceReport> {
1881        crate::query::impact::impact_surface_report_for_symbol(
1882            self.storage.connection(),
1883            symbol,
1884            limit,
1885            options,
1886        )
1887    }
1888
1889    pub fn repo_brief(
1890        &self,
1891        options: crate::query::repo_brief::RepoBriefOptions,
1892    ) -> anyhow::Result<crate::query::repo_brief::RepoBrief> {
1893        crate::query::repo_brief::repo_brief(self.storage.connection(), options)
1894    }
1895
1896    pub fn repo_clusters(
1897        &self,
1898        options: crate::query::clusters::RepoClustersOptions,
1899    ) -> anyhow::Result<crate::query::clusters::RepoClustersReport> {
1900        crate::query::clusters::repo_clusters(self.storage.connection(), options)
1901    }
1902
1903    pub fn memory_create(
1904        &self,
1905        request: crate::query::memory::RepoMemoryCreate,
1906    ) -> anyhow::Result<crate::query::memory::RepoMemoryCreateResult> {
1907        crate::query::memory::create_memory(self.storage.connection(), request)
1908    }
1909
1910    pub fn memory_update(
1911        &self,
1912        update: crate::query::memory::RepoMemoryUpdate,
1913    ) -> anyhow::Result<crate::query::memory::RepoMemory> {
1914        crate::query::memory::update_memory(self.storage.connection(), update)
1915    }
1916
1917    pub fn memory_mark_obsolete(
1918        &self,
1919        memory_id: &str,
1920    ) -> anyhow::Result<crate::query::memory::RepoMemory> {
1921        crate::query::memory::mark_obsolete(self.storage.connection(), memory_id)
1922    }
1923
1924    pub fn memory_search(
1925        &self,
1926        query: &str,
1927        limit: u32,
1928    ) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
1929        crate::query::memory::memory_search(self.storage.connection(), query, limit)
1930    }
1931
1932    pub fn memory_for_symbol(
1933        &self,
1934        symbol: &crate::query::symbol::SymbolHit,
1935        limit: u32,
1936    ) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
1937        crate::query::memory::memories_for_symbol(self.storage.connection(), symbol, limit)
1938    }
1939
1940    pub fn memory_for_path(
1941        &self,
1942        path: &str,
1943        limit: u32,
1944    ) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
1945        crate::query::memory::memories_for_path(self.storage.connection(), path, limit)
1946    }
1947
1948    pub fn memory_for_edges(
1949        &self,
1950        edge_ids: &[i64],
1951        limit: u32,
1952    ) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
1953        crate::query::memory::memories_for_edges(self.storage.connection(), edge_ids, limit)
1954    }
1955
1956    pub fn memory_evidence_for_symbol_and_edges(
1957        &self,
1958        symbol: &crate::query::symbol::SymbolHit,
1959        edge_ids: &[i64],
1960        limit: u32,
1961    ) -> anyhow::Result<crate::query::memory::RepoMemoryEvidence> {
1962        crate::query::memory::memory_evidence_for_symbol_and_edges(
1963            self.storage.connection(),
1964            symbol,
1965            edge_ids,
1966            limit,
1967        )
1968    }
1969
1970    pub fn memory_for_call_path_hash(
1971        &self,
1972        edge_sequence_hash: &str,
1973        limit: u32,
1974    ) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
1975        crate::query::memory::memories_for_call_path_hash(
1976            self.storage.connection(),
1977            edge_sequence_hash,
1978            limit,
1979        )
1980    }
1981
1982    pub fn memory_validate(
1983        &self,
1984    ) -> anyhow::Result<crate::query::memory::RepoMemoryValidationReport> {
1985        crate::query::memory::validate_memories(self.storage.connection())
1986    }
1987
1988    pub fn rebuild_fts(&self) -> anyhow::Result<()> {
1989        schema::rebuild_fts(self.storage.connection())?;
1990        self.record_content_revision()?;
1991        self.record_fts_current()?;
1992        self.set_meta("fts_dirty", "false")?;
1993        Ok(())
1994    }
1995
1996    pub fn sync_fts(&self) -> anyhow::Result<()> {
1997        self.record_content_revision()?;
1998        self.record_fts_current()?;
1999        self.set_meta("fts_dirty", "false")?;
2000        Ok(())
2001    }
2002
2003    fn record_fts_current(&self) -> anyhow::Result<()> {
2004        self.set_meta("fts_synced_at_ms", &now_ms().to_string())?;
2005        let revision = self.content_revision()?;
2006        self.set_meta("fts_source_revision", &revision)?;
2007        Ok(())
2008    }
2009
2010    fn record_content_revision(&self) -> anyhow::Result<String> {
2011        let revision = self.content_revision()?;
2012        self.set_meta("content_revision", &revision)?;
2013        Ok(revision)
2014    }
2015
2016    pub fn heal_file(&self, path: &Path) -> anyhow::Result<()> {
2017        let Some(root) = self.storage.source_root() else {
2018            anyhow::bail!("index has no source_root metadata; rebuild required");
2019        };
2020        let row = self.file_row(path)?;
2021        let full_path = root.join(path);
2022        let text = fs::read_to_string(&full_path)?;
2023
2024        let changes = git_changed_paths(root).unwrap_or_default();
2025        let is_dirty = changes.changed.contains(path);
2026        let has_base_commit = !self.active_commit_sha.is_empty();
2027        let scope = if !has_base_commit || is_dirty {
2028            FileScope::worktree(self.active_worktree_id.clone())
2029        } else {
2030            FileScope::commit(self.active_commit_sha.clone())
2031        };
2032        self.remove_file_in_scope(path, &scope.commit_sha, &scope.worktree_id)?;
2033
2034        self.index_file(
2035            path,
2036            row.language,
2037            row.kind,
2038            file_metadata_ms(&full_path)?,
2039            &text,
2040            &scope,
2041        )?;
2042        self.rebuild_logical_symbols()?;
2043        self.resolve_edges()
2044    }
2045
2046    fn index_file(
2047        &self,
2048        path: &Path,
2049        language: Language,
2050        kind: TargetKind,
2051        modified_at_ms: i64,
2052        text: &str,
2053        scope: &FileScope,
2054    ) -> anyhow::Result<()> {
2055        if language != Language::Markdown && kind != TargetKind::Generated {
2056            if text.len() > chunker::MAX_STRUCTURAL_PARSE_BYTES {
2057                // Large source files are intentionally coarse-indexed to keep full-repo indexing
2058                // responsive. This is not a parser failure.
2059            } else if let Some(message) = parser::parse_error(path, language, text)
2060                .unwrap_or_else(|err| Some(err.to_string()))
2061            {
2062                self.insert_parser_failure(path, language, &message)?;
2063            }
2064        }
2065        let sha256 = hex_sha256(text.as_bytes());
2066        let file_id = self.storage.connection().query_row(
2067            "INSERT INTO main.files(path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms, indexed_revision, commit_sha, worktree_id)
2068             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)
2069             RETURNING id",
2070            params![
2071                path_string(path),
2072                language.as_str(),
2073                kind.as_str(),
2074                sha256,
2075                modified_at_ms,
2076                matches!(kind, TargetKind::Generated),
2077                now_ms(),
2078                sha256,
2079                &scope.commit_sha,
2080                &scope.worktree_id,
2081            ],
2082            |row| row.get::<_, i64>(0),
2083        )?;
2084        let chunks = if kind == TargetKind::Generated {
2085            chunker::generated_chunks_for_file(path, text)
2086        } else {
2087            chunker::chunks_for_file(path, language, text)
2088        };
2089        let symbols =
2090            if kind == TargetKind::Generated || text.len() > chunker::MAX_STRUCTURAL_PARSE_BYTES {
2091                Vec::new()
2092            } else {
2093                symbols::symbols_for_file(path, language, text)
2094            };
2095        self.insert_chunks(file_id, &sha256, &chunks, text)?;
2096        self.insert_symbols(file_id, language, &symbols)?;
2097        if kind != TargetKind::Generated && text.len() <= edges::MAX_GRAPH_PARSE_BYTES {
2098            edges::index_file_edges(self.storage.connection(), file_id, path, language, text)?;
2099        }
2100        self.mark_fts_dirty()?;
2101        Ok(())
2102    }
2103
2104    fn insert_prepared_file(&self, prepared_file: &PreparedIndexFile) -> anyhow::Result<()> {
2105        let file = &prepared_file.file;
2106        let prepared = match &prepared_file.prepared {
2107            Ok(prepared) => prepared,
2108            Err(err) => {
2109                self.insert_parser_failure(&file.relative_path, file.language, &err.to_string())?;
2110                return Ok(());
2111            },
2112        };
2113        if let Some(message) = &prepared.parser_failure {
2114            self.insert_parser_failure(&file.relative_path, file.language, message)?;
2115        }
2116        let file_id = self.storage.connection().query_row(
2117            "INSERT INTO main.files(path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms, indexed_revision, commit_sha, worktree_id)
2118             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)
2119             RETURNING id",
2120            params![
2121                path_string(&file.relative_path),
2122                file.language.as_str(),
2123                file.kind.as_str(),
2124                prepared.sha256,
2125                prepared.modified_at_ms,
2126                matches!(file.kind, TargetKind::Generated),
2127                now_ms(),
2128                prepared.sha256,
2129                file.commit_sha,
2130                file.worktree_id,
2131            ],
2132            |row| row.get::<_, i64>(0),
2133        )?;
2134        self.insert_chunks(file_id, &prepared.sha256, &prepared.chunks, &prepared.text)?;
2135        self.insert_symbols(file_id, file.language, &prepared.symbols)?;
2136        if file.kind != TargetKind::Generated && prepared.text.len() <= edges::MAX_GRAPH_PARSE_BYTES
2137        {
2138            edges::index_file_edges(
2139                self.storage.connection(),
2140                file_id,
2141                &file.relative_path,
2142                file.language,
2143                &prepared.text,
2144            )?;
2145        }
2146        self.mark_fts_dirty()?;
2147        Ok(())
2148    }
2149
2150    fn insert_chunks(
2151        &self,
2152        file_id: i64,
2153        source_revision: &str,
2154        chunks: &[Chunk],
2155        full_text: &str,
2156    ) -> anyhow::Result<()> {
2157        let (path, language, kind) = self.storage.connection().query_row(
2158            "SELECT path, language, kind FROM main.files WHERE id = ?1",
2159            [file_id],
2160            |row| {
2161                Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?, row.get::<_, String>(2)?))
2162            },
2163        )?;
2164        for chunk in chunks {
2165            let anchor =
2166                anchors::anchor_for_text(&chunk.text, chunk.start_line, chunk.end_line, full_text);
2167            let embedding_policy = ai::embedding_policy_for_chunk(
2168                Path::new(&path),
2169                &language,
2170                &kind,
2171                chunk.kind,
2172                chunk.symbol_path.as_deref(),
2173                &chunk.text,
2174                ai::DEFAULT_MAX_EMBEDDING_CHARS,
2175            );
2176            self.storage.connection().execute(
2177                "INSERT INTO chunks(file_id, chunk_kind, symbol_path, start_byte, end_byte, start_line, end_line, text, text_hash,
2178                                    source_revision, anchor_version, normalized_hash, start_boundary_hash, end_boundary_hash,
2179                                    start_context_hash, end_context_hash, context_radius, embedding_policy, embedding_priority)
2180                 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19)",
2181                params![
2182                    file_id,
2183                    chunk.kind,
2184                    chunk.symbol_path,
2185                    i64::try_from(chunk.start_byte)?,
2186                    i64::try_from(chunk.end_byte)?,
2187                    i64::try_from(chunk.start_line)?,
2188                    i64::try_from(chunk.end_line)?,
2189                    chunk.text,
2190                    hex_sha256(chunk.text.as_bytes()),
2191                    source_revision,
2192                    anchor.version,
2193                    anchor.normalized_hash,
2194                    anchor.start_boundary_hash,
2195                    anchor.end_boundary_hash,
2196                    anchor.start_context_hash,
2197                    anchor.end_context_hash,
2198                    anchor.context_radius,
2199                    embedding_policy.policy,
2200                    embedding_policy.priority,
2201                ],
2202            )?;
2203            let chunk_id = self.storage.connection().last_insert_rowid();
2204            self.storage.connection().execute(
2205                "INSERT INTO chunk_fts(rowid, text) VALUES (?1, ?2)",
2206                params![chunk_id, chunk.text],
2207            )?;
2208        }
2209        Ok(())
2210    }
2211
2212    fn insert_symbols(
2213        &self,
2214        file_id: i64,
2215        language: Language,
2216        symbols: &[Symbol],
2217    ) -> anyhow::Result<()> {
2218        for symbol in symbols {
2219            self.storage.connection().execute(
2220                "INSERT INTO symbols(file_id, language, name, qualified_name, kind, start_byte, end_byte, signature, docs)
2221                 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)",
2222                params![
2223                    file_id,
2224                    language.as_str(),
2225                    symbol.name,
2226                    symbol.qualified_name,
2227                    symbol.kind,
2228                    i64::try_from(symbol.start_byte)?,
2229                    i64::try_from(symbol.end_byte)?,
2230                    symbol.signature,
2231                    symbol.docs,
2232                ],
2233            )?;
2234            let symbol_id = self.storage.connection().last_insert_rowid();
2235            for fact in &symbol.facts {
2236                self.storage.connection().execute(
2237                    "INSERT OR IGNORE INTO symbol_facts(symbol_id, fact_kind, fact_value)
2238                     VALUES (?1, ?2, ?3)",
2239                    params![symbol_id, fact.kind, fact.value],
2240                )?;
2241            }
2242        }
2243        Ok(())
2244    }
2245
2246    fn write_git_meta(&self, root: &Path) -> anyhow::Result<()> {
2247        self.set_meta("git_commit", &git_output(root, &["rev-parse", "HEAD"]).unwrap_or_default())?;
2248        let dirty = !git_output(root, &["status", "--porcelain"]).unwrap_or_default().is_empty();
2249        self.set_meta("git_dirty", if dirty { "true" } else { "false" })?;
2250        Ok(())
2251    }
2252
2253    fn apply_prepared_git_history(
2254        &self,
2255        root: &Path,
2256        handle: JoinHandle<anyhow::Result<git_history::PreparedGitHistory>>,
2257    ) -> anyhow::Result<GitHistoryIndexStatus> {
2258        let prepared = join_git_history_prepare(handle)?;
2259        git_history::apply_prepared(self.storage.connection(), root, prepared)
2260    }
2261
2262    fn git_history_status(&self) -> anyhow::Result<GitHistoryIndexStatus> {
2263        let Some(root) = self.storage.source_root() else {
2264            return git_history::status(self.storage.connection(), Path::new("."));
2265        };
2266        git_history::status(self.storage.connection(), root)
2267    }
2268
2269    fn github_status(&self) -> anyhow::Result<GitHubStatus> {
2270        github::status(self.storage.connection())
2271    }
2272
2273    fn mark_fts_dirty(&self) -> anyhow::Result<()> {
2274        self.set_meta("fts_dirty", "true")
2275    }
2276
2277    fn resolve_edges(&self) -> anyhow::Result<()> {
2278        edges::resolve_all_edges(self.storage.connection())
2279    }
2280
2281    fn rebuild_logical_symbols(&self) -> anyhow::Result<()> {
2282        // The insert below re-derives the COMPLETE logical-symbol table from all current symbols,
2283        // so clear it entirely first. A member-join "rebuild set" misses logical_symbols whose
2284        // members were cascade-deleted with their symbols (clear_full_rebuild_tables deletes
2285        // files → symbols → logical_symbol_members via FK, but logical_symbols has no such FK).
2286        // Those orphans would then collide with the deterministic stable id on re-insert.
2287        self.storage.connection().execute_batch(
2288            "
2289            DELETE FROM main.logical_symbol_members;
2290            DELETE FROM main.logical_symbols;
2291            ",
2292        )?;
2293
2294        let mut stmt = self.storage.connection().prepare(
2295            "
2296            SELECT symbols.id, symbols.file_id, files.path, symbols.language, symbols.name,
2297                   symbols.qualified_name, symbols.kind, symbols.start_byte, symbols.end_byte,
2298                   symbols.signature,
2299                   COALESCE((
2300                     SELECT chunks.start_byte
2301                     FROM chunks
2302                     WHERE chunks.file_id = symbols.file_id
2303                       AND symbols.start_byte >= chunks.start_byte
2304                       AND symbols.start_byte < chunks.end_byte
2305                     ORDER BY chunks.end_byte - chunks.start_byte ASC
2306                     LIMIT 1
2307                   ), symbols.start_byte) AS chunk_start_byte,
2308                   COALESCE((
2309                     SELECT chunks.start_line
2310                     FROM chunks
2311                     WHERE chunks.file_id = symbols.file_id
2312                       AND symbols.start_byte >= chunks.start_byte
2313                       AND symbols.start_byte < chunks.end_byte
2314                     ORDER BY chunks.end_byte - chunks.start_byte ASC
2315                     LIMIT 1
2316                   ), 1) AS chunk_start_line,
2317                   COALESCE((
2318                     SELECT chunks.text
2319                     FROM chunks
2320                     WHERE chunks.file_id = symbols.file_id
2321                       AND symbols.start_byte >= chunks.start_byte
2322                       AND symbols.start_byte < chunks.end_byte
2323                     ORDER BY chunks.end_byte - chunks.start_byte ASC
2324                     LIMIT 1
2325                   ), '') AS chunk_text
2326            FROM symbols
2327            JOIN files ON files.id = symbols.file_id
2328            ORDER BY files.path, symbols.language, symbols.qualified_name, symbols.kind,
2329                     symbols.start_byte, symbols.end_byte
2330            ",
2331        )?;
2332        let rows = stmt.query_map([], |row| {
2333            let start_byte = usize::try_from(row.get::<_, i64>(7)?).unwrap_or(0);
2334            let end_byte = usize::try_from(row.get::<_, i64>(8)?).unwrap_or(0);
2335            let chunk_start_byte = usize::try_from(row.get::<_, i64>(10)?).unwrap_or(start_byte);
2336            let chunk_start_line = row.get::<_, i64>(11)?;
2337            let chunk_text: String = row.get(12)?;
2338            let start_line =
2339                symbol_line_for_byte(&chunk_text, chunk_start_byte, chunk_start_line, start_byte);
2340            let end_line =
2341                symbol_line_for_byte(&chunk_text, chunk_start_byte, chunk_start_line, end_byte);
2342            Ok(LogicalSymbolMemberRow {
2343                symbol_id: row.get(0)?,
2344                path: row.get(2)?,
2345                language: row.get(3)?,
2346                name: row.get(4)?,
2347                qualified_name: row.get(5)?,
2348                kind: row.get(6)?,
2349                signature: row.get(9)?,
2350                start_line,
2351                end_line,
2352            })
2353        })?;
2354        let mut groups: BTreeMap<LogicalSymbolKey, Vec<LogicalSymbolMemberRow>> = BTreeMap::new();
2355        for row in rows {
2356            let row = row?;
2357            groups.entry(LogicalSymbolKey::from(&row)).or_default().push(row);
2358        }
2359        for (key, members) in groups {
2360            let group_reason = if members.len() > 1 { "cfg_variant" } else { "single" };
2361            let logical_symbol_id = key.stable_id();
2362            self.storage.connection().execute(
2363                "
2364                INSERT INTO logical_symbols(id, language, path, logical_name, qualified_name, kind, variant_count, group_reason)
2365                VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)
2366                ",
2367                params![
2368                    logical_symbol_id,
2369                    key.language,
2370                    key.path,
2371                    key.name,
2372                    key.qualified_name,
2373                    key.kind,
2374                    i64::try_from(members.len()).unwrap_or(i64::MAX),
2375                    group_reason,
2376                ],
2377            )?;
2378            for member in members {
2379                let signature_hash =
2380                    member.signature.as_deref().map(|signature| hex_sha256(signature.as_bytes()));
2381                self.storage.connection().execute(
2382                    "
2383                    INSERT INTO logical_symbol_members(
2384                        logical_symbol_id, symbol_id, cfg_expr, signature_hash, start_line, end_line
2385                    )
2386                    VALUES (?1, ?2, NULL, ?3, ?4, ?5)
2387                    ",
2388                    params![
2389                        logical_symbol_id,
2390                        member.symbol_id,
2391                        signature_hash,
2392                        member.start_line,
2393                        member.end_line,
2394                    ],
2395                )?;
2396            }
2397        }
2398        Ok(())
2399    }
2400
2401    fn graph_coverage(
2402        &self,
2403        paths: BTreeSet<String>,
2404    ) -> anyhow::Result<crate::query::graph::GraphCoverage> {
2405        let indexed_files =
2406            self.storage
2407                .connection()
2408                .query_row("SELECT COUNT(*) FROM files", [], |row| row.get::<_, i64>(0))?;
2409        let parser_failure_paths = self.parser_failure_paths()?;
2410        let parser_failures = u64::try_from(parser_failure_paths.len()).unwrap_or(0);
2411        let known_index_gaps = parser_failure_paths
2412            .iter()
2413            .map(|failure| {
2414                format!(
2415                    "{} parser failed for {}: {}",
2416                    failure.language, failure.path, failure.message
2417                )
2418            })
2419            .collect::<Vec<_>>();
2420        let mut stale_files = 0_u64;
2421        let mut parser_coverage_for_paths = Vec::new();
2422        for path in paths {
2423            let Some(row) = self.graph_path_row(&path)? else {
2424                parser_coverage_for_paths.push(crate::query::graph::GraphPathCoverage {
2425                    path,
2426                    language: "unknown".to_string(),
2427                    parser_status: "missing_from_index".to_string(),
2428                    graph_status: "missing_from_index".to_string(),
2429                    last_indexed_revision: None,
2430                });
2431                continue;
2432            };
2433            let stale = self.source_path_is_stale(&path, &row.sha256);
2434            if stale {
2435                stale_files += 1;
2436            }
2437            let parser_failed = parser_failure_paths.iter().any(|failure| failure.path == path);
2438            parser_coverage_for_paths.push(crate::query::graph::GraphPathCoverage {
2439                path,
2440                language: row.language,
2441                parser_status: if parser_failed { "failed" } else { "ok" }.to_string(),
2442                graph_status: if stale {
2443                    "stale_source"
2444                } else if parser_failed {
2445                    "parser_failed"
2446                } else {
2447                    "ok"
2448                }
2449                .to_string(),
2450                last_indexed_revision: (!row.indexed_revision.is_empty())
2451                    .then_some(row.indexed_revision),
2452            });
2453        }
2454        Ok(crate::query::graph::GraphCoverage {
2455            indexed_files: u64::try_from(indexed_files).unwrap_or(0),
2456            parser_failures,
2457            stale_files,
2458            known_index_gaps,
2459            parser_coverage_for_paths,
2460        })
2461    }
2462
2463    fn graph_path_row(&self, path: &str) -> anyhow::Result<Option<GraphPathRow>> {
2464        self.storage
2465            .connection()
2466            .query_row(
2467                "SELECT language, sha256, indexed_revision FROM files WHERE path = ?1",
2468                [path],
2469                |row| {
2470                    Ok(GraphPathRow {
2471                        language: row.get(0)?,
2472                        sha256: row.get(1)?,
2473                        indexed_revision: row.get(2)?,
2474                    })
2475                },
2476            )
2477            .optional()
2478            .map_err(Into::into)
2479    }
2480
2481    fn source_path_is_stale(&self, path: &str, indexed_sha256: &str) -> bool {
2482        let Some(root) = self.storage.source_root() else {
2483            return false;
2484        };
2485        let Ok(bytes) = fs::read(root.join(path)) else {
2486            return true;
2487        };
2488        hex_sha256(&bytes) != indexed_sha256
2489    }
2490
2491    fn regex_hits(
2492        &self,
2493        pattern: &str,
2494        regex: &Regex,
2495        include_tests: bool,
2496    ) -> anyhow::Result<Vec<crate::query::graph::TextOnlyHit>> {
2497        let Some(root) = self.storage.source_root() else {
2498            anyhow::bail!("cannot compare graph to text: source_root is missing from index_meta");
2499        };
2500        let mut stmt = self.storage.connection().prepare("SELECT path FROM files ORDER BY path")?;
2501        let paths =
2502            stmt.query_map([], |row| row.get::<_, String>(0))?.collect::<Result<Vec<_>, _>>()?;
2503        let mut hits = Vec::new();
2504        for path in paths {
2505            if !include_tests && is_test_like_path(&path) {
2506                continue;
2507            }
2508            let full_path = root.join(&path);
2509            let Ok(text) = fs::read_to_string(&full_path) else {
2510                continue;
2511            };
2512            for (index, line) in text.lines().enumerate() {
2513                if regex.is_match(line) {
2514                    hits.push(crate::query::graph::TextOnlyHit {
2515                        path: path.clone(),
2516                        line: i64::try_from(index + 1).unwrap_or(i64::MAX),
2517                        text: line.trim().to_string(),
2518                        reason: "text pattern matched".to_string(),
2519                        likely_gap: pattern.to_string(),
2520                    });
2521                }
2522            }
2523        }
2524        Ok(hits)
2525    }
2526
2527    fn current_line_text(&self, path: &str, line: i64) -> anyhow::Result<Option<String>> {
2528        let Some(root) = self.storage.source_root() else {
2529            return Ok(None);
2530        };
2531        let Ok(text) = fs::read_to_string(root.join(path)) else {
2532            return Ok(None);
2533        };
2534        let Some(index) = usize::try_from(line.saturating_sub(1)).ok() else {
2535            return Ok(None);
2536        };
2537        Ok(text.lines().nth(index).map(|line| line.trim().to_string()))
2538    }
2539
2540    fn ensure_graph_index_current(&self) -> anyhow::Result<()> {
2541        if self.meta("graph_index_version")?.as_deref() == Some(GRAPH_INDEX_VERSION) {
2542            return Ok(());
2543        }
2544        let Some(root) = self.storage.source_root().map(Path::to_path_buf) else {
2545            return Ok(());
2546        };
2547        self.storage.execute_batch("BEGIN IMMEDIATE TRANSACTION")?;
2548        let result = (|| -> anyhow::Result<()> {
2549            self.storage.connection().execute("DELETE FROM edges", [])?;
2550            let files = self.graph_reindex_files()?;
2551            for file in files {
2552                if file.kind == TargetKind::Generated || file.language == Language::Markdown {
2553                    continue;
2554                }
2555                let full_path = root.join(&file.path);
2556                let Ok(text) = fs::read_to_string(full_path) else {
2557                    continue;
2558                };
2559                if text.len() > edges::MAX_GRAPH_PARSE_BYTES {
2560                    continue;
2561                }
2562                edges::index_file_edges(
2563                    self.storage.connection(),
2564                    file.id,
2565                    Path::new(&file.path),
2566                    file.language,
2567                    &text,
2568                )?;
2569            }
2570            self.resolve_edges()?;
2571            self.mark_graph_index_current()?;
2572            Ok(())
2573        })();
2574        if result.is_err() {
2575            let _ = self.storage.execute_batch("ROLLBACK");
2576        }
2577        result?;
2578        self.storage.execute_batch("COMMIT")?;
2579        Ok(())
2580    }
2581
2582    fn mark_graph_index_current(&self) -> anyhow::Result<()> {
2583        self.set_meta("graph_index_version", GRAPH_INDEX_VERSION)
2584    }
2585
2586    fn set_meta(&self, key: &str, value: &str) -> anyhow::Result<()> {
2587        self.storage.connection().execute(
2588            "INSERT INTO index_meta(key, value) VALUES (?1, ?2)
2589             ON CONFLICT(key) DO UPDATE SET value = excluded.value",
2590            params![key, value],
2591        )?;
2592        Ok(())
2593    }
2594
2595    fn meta(&self, key: &str) -> anyhow::Result<Option<String>> {
2596        meta_for(self.storage.connection(), key)
2597    }
2598
2599    fn insert_parser_failure(
2600        &self,
2601        path: &Path,
2602        language: Language,
2603        message: &str,
2604    ) -> anyhow::Result<()> {
2605        self.storage.connection().execute(
2606            "INSERT INTO parser_failures(path, language, message) VALUES (?1, ?2, ?3)",
2607            params![path_string(path), language.as_str(), message],
2608        )?;
2609        Ok(())
2610    }
2611
2612    fn parser_failure_count(&self) -> anyhow::Result<u64> {
2613        let count = self.storage.connection().query_row(
2614            "SELECT COUNT(*) FROM parser_failures",
2615            [],
2616            |row| row.get::<_, i64>(0),
2617        )?;
2618        Ok(u64::try_from(count).unwrap_or(0))
2619    }
2620
2621    fn parser_failure_paths(&self) -> anyhow::Result<Vec<ParserFailure>> {
2622        let mut stmt = self.storage.connection().prepare(
2623            "SELECT path, language, message FROM parser_failures ORDER BY path, language, message",
2624        )?;
2625        let rows = stmt.query_map([], |row| {
2626            Ok(ParserFailure { path: row.get(0)?, language: row.get(1)?, message: row.get(2)? })
2627        })?;
2628        let mut failures = Vec::new();
2629        for row in rows {
2630            failures.push(row?);
2631        }
2632        Ok(failures)
2633    }
2634
2635    fn search_with_heal(
2636        &self,
2637        query: &str,
2638        limit: u32,
2639        include_generated: bool,
2640        allow_heal: bool,
2641        explain: bool,
2642        options: SearchOptions,
2643    ) -> anyhow::Result<Vec<SearchHit>> {
2644        let hits = crate::search::lexical::search_with_options(
2645            self.storage.connection(),
2646            query,
2647            limit,
2648            include_generated,
2649            explain,
2650            options,
2651        )?;
2652        if !allow_heal {
2653            return Ok(hits);
2654        }
2655        let stale = self.stale_hit_paths(&hits)?;
2656        if stale.is_empty() {
2657            return Ok(hits);
2658        }
2659        if stale.len() > MAX_AUTO_HEAL_FILES_PER_CALL {
2660            anyhow::bail!(IndexError::NeedsReindex {
2661                stale_files: stale.len(),
2662                cap: MAX_AUTO_HEAL_FILES_PER_CALL,
2663            });
2664        }
2665        for path in stale {
2666            self.heal_file(Path::new(&path))?;
2667        }
2668        self.sync_fts()?;
2669        self.search_with_heal(query, limit, include_generated, false, explain, options)
2670    }
2671
2672    fn stale_hit_paths(&self, hits: &[SearchHit]) -> anyhow::Result<Vec<String>> {
2673        let Some(root) = self.storage.source_root() else {
2674            return Ok(Vec::new());
2675        };
2676        let mut stale = Vec::new();
2677        let mut seen = BTreeSet::new();
2678        for hit in hits {
2679            if !seen.insert(hit.path.clone()) {
2680                continue;
2681            }
2682            let source_path = root.join(&hit.path);
2683            let Ok(text) = fs::read_to_string(source_path) else {
2684                stale.push(hit.path.clone());
2685                continue;
2686            };
2687            let chunk = crate::query::read_chunk(self.storage.connection(), hit.chunk_id)?;
2688            let Some(chunk) = chunk else {
2689                stale.push(hit.path.clone());
2690                continue;
2691            };
2692            let anchor = self.chunk_anchor(hit.chunk_id)?;
2693            let status = anchors::validate(
2694                &chunk.text,
2695                usize::try_from(chunk.start_line).unwrap_or(1),
2696                usize::try_from(chunk.end_line).unwrap_or(1),
2697                &anchor,
2698                &text,
2699            );
2700            if !matches!(status, AnchorStatus::Exact) {
2701                stale.push(hit.path.clone());
2702            }
2703        }
2704        Ok(stale)
2705    }
2706
2707    fn chunk_anchor(&self, chunk_id: i64) -> anyhow::Result<ChunkAnchor> {
2708        Ok(self.storage.connection().query_row(
2709            "
2710            SELECT anchor_version, normalized_hash, start_boundary_hash, end_boundary_hash,
2711                   start_context_hash, end_context_hash, context_radius
2712            FROM chunks WHERE id = ?1
2713            ",
2714            [chunk_id],
2715            |row| {
2716                Ok(ChunkAnchor {
2717                    version: row.get(0)?,
2718                    normalized_hash: row.get(1)?,
2719                    start_boundary_hash: row.get(2)?,
2720                    end_boundary_hash: row.get(3)?,
2721                    start_context_hash: row.get(4)?,
2722                    end_context_hash: row.get(5)?,
2723                    context_radius: row.get(6)?,
2724                })
2725            },
2726        )?)
2727    }
2728
2729    fn mark_file_deleted(&self, path: &Path) -> anyhow::Result<()> {
2730        let path = path_string(path);
2731        self.remove_file_in_scope(Path::new(&path), "", &self.active_worktree_id)?;
2732        self.storage.connection().execute(
2733            "INSERT INTO main.files(path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms, indexed_revision, commit_sha, worktree_id)
2734             VALUES (?1, 'unknown', 'deleted', '', 0, 0, ?2, '', '', ?3)
2735             ON CONFLICT(path, commit_sha, worktree_id) DO UPDATE SET
2736                kind = 'deleted',
2737                sha256 = '',
2738                modified_at_ms = 0,
2739                indexed_at_ms = excluded.indexed_at_ms",
2740            params![path, now_ms(), self.active_worktree_id],
2741        )?;
2742        self.mark_fts_dirty()?;
2743        Ok(())
2744    }
2745
2746    fn remove_file_in_scope(
2747        &self,
2748        path: &Path,
2749        commit_sha: &str,
2750        worktree_id: &str,
2751    ) -> anyhow::Result<()> {
2752        let path = path_string(path);
2753        self.storage.connection().execute(
2754            "UPDATE edges
2755             SET to_symbol_id = NULL,
2756                 confidence = 'NameOnly'
2757             WHERE to_symbol_id IN (
2758                 SELECT symbols.id FROM symbols
2759                 JOIN main.files ON main.files.id = symbols.file_id
2760                 WHERE main.files.path = ?1
2761                   AND main.files.commit_sha = ?2
2762                   AND main.files.worktree_id = ?3
2763             )",
2764            params![path, commit_sha, worktree_id],
2765        )?;
2766        self.storage.connection().execute(
2767            "DELETE FROM edges
2768             WHERE source_file_id IN (
2769                    SELECT id FROM main.files
2770                    WHERE path = ?1 AND commit_sha = ?2 AND worktree_id = ?3
2771                )
2772                OR from_symbol_id IN (
2773                    SELECT symbols.id FROM symbols
2774                    JOIN main.files ON main.files.id = symbols.file_id
2775                    WHERE main.files.path = ?1
2776                      AND main.files.commit_sha = ?2
2777                      AND main.files.worktree_id = ?3
2778                )",
2779            params![path, commit_sha, worktree_id],
2780        )?;
2781        self.storage
2782            .connection()
2783            .execute("DELETE FROM parser_failures WHERE path = ?1", [&path])?;
2784        self.storage.connection().execute(
2785            "DELETE FROM chunk_fts
2786             WHERE rowid IN (
2787                 SELECT chunks.id FROM chunks
2788                 JOIN main.files ON main.files.id = chunks.file_id
2789                 WHERE main.files.path = ?1
2790                   AND main.files.commit_sha = ?2
2791                   AND main.files.worktree_id = ?3
2792             )",
2793            params![path, commit_sha, worktree_id],
2794        )?;
2795        self.storage.connection().execute(
2796            "DELETE FROM chunks
2797             WHERE file_id IN (
2798                SELECT id FROM main.files
2799                WHERE path = ?1 AND commit_sha = ?2 AND worktree_id = ?3
2800             )",
2801            params![path, commit_sha, worktree_id],
2802        )?;
2803        self.storage.connection().execute(
2804            "DELETE FROM symbols
2805             WHERE file_id IN (
2806                SELECT id FROM main.files
2807                WHERE path = ?1 AND commit_sha = ?2 AND worktree_id = ?3
2808             )",
2809            params![path, commit_sha, worktree_id],
2810        )?;
2811        self.storage.connection().execute(
2812            "DELETE FROM main.files WHERE path = ?1 AND commit_sha = ?2 AND worktree_id = ?3",
2813            params![path, commit_sha, worktree_id],
2814        )?;
2815        self.mark_fts_dirty()?;
2816        Ok(())
2817    }
2818
2819    fn ensure_fts_fresh(&self) -> anyhow::Result<()> {
2820        let content_revision = self.content_revision()?;
2821        let fts_source_revision = self.meta("fts_source_revision")?;
2822        if !self.fts_dirty()? && fts_source_revision.as_deref() == Some(content_revision.as_str()) {
2823            return Ok(());
2824        }
2825        self.rebuild_fts()?;
2826        let refreshed_revision = self.meta("fts_source_revision")?;
2827        if refreshed_revision.as_deref() != Some(content_revision.as_str()) {
2828            anyhow::bail!(
2829                "FTS freshness invariant failed: content_revision={content_revision}, fts_source_revision={}",
2830                refreshed_revision.unwrap_or_else(|| "<missing>".to_string())
2831            );
2832        }
2833        Ok(())
2834    }
2835
2836    fn fts_dirty(&self) -> anyhow::Result<bool> {
2837        Ok(self.meta("fts_dirty")?.as_deref() == Some("true"))
2838    }
2839
2840    fn file_row(&self, path: &Path) -> anyhow::Result<FileRow> {
2841        self.storage
2842            .connection()
2843            .query_row(
2844                "SELECT language, kind FROM files WHERE path = ?1",
2845                [path_string(path)],
2846                |row| {
2847                    let language: String = row.get(0)?;
2848                    let kind: String = row.get(1)?;
2849                    Ok((language, kind))
2850                },
2851            )
2852            .map_err(Into::into)
2853            .and_then(|(language, kind)| {
2854                Ok(FileRow { language: language.parse()?, kind: kind.parse()? })
2855            })
2856    }
2857
2858    fn graph_reindex_files(&self) -> anyhow::Result<Vec<GraphReindexFile>> {
2859        let mut stmt = self
2860            .storage
2861            .connection()
2862            .prepare("SELECT id, path, language, kind FROM files ORDER BY path")?;
2863        let rows = stmt.query_map([], |row| {
2864            let language: String = row.get(2)?;
2865            let kind: String = row.get(3)?;
2866            Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?, language, kind))
2867        })?;
2868        let mut files = Vec::new();
2869        for row in rows {
2870            let (id, path, language, kind) = row?;
2871            files.push(GraphReindexFile {
2872                id,
2873                path,
2874                language: language.parse()?,
2875                kind: kind.parse()?,
2876            });
2877        }
2878        Ok(files)
2879    }
2880
2881    fn indexed_files(&self) -> anyhow::Result<Vec<IndexedFile>> {
2882        let mut stmt =
2883            self.storage.connection().prepare("SELECT path, sha256 FROM files ORDER BY path")?;
2884        let rows =
2885            stmt.query_map([], |row| Ok(IndexedFile { path: row.get(0)?, sha256: row.get(1)? }))?;
2886        let mut files = Vec::new();
2887        for row in rows {
2888            files.push(row?);
2889        }
2890        Ok(files)
2891    }
2892
2893    fn indexed_file_count(&self) -> anyhow::Result<usize> {
2894        let count =
2895            self.storage
2896                .connection()
2897                .query_row("SELECT COUNT(*) FROM files", [], |row| row.get::<_, i64>(0))?;
2898        Ok(usize::try_from(count).unwrap_or(usize::MAX))
2899    }
2900
2901    fn content_revision(&self) -> anyhow::Result<String> {
2902        let value = self.storage.connection().query_row(
2903            "SELECT COALESCE(string_agg(path || ':' || sha256, ',' ORDER BY path), '') FROM files",
2904            [],
2905            |row| row.get::<_, String>(0),
2906        )?;
2907        Ok(hex_sha256(value.as_bytes()))
2908    }
2909}
2910
2911#[derive(Debug)]
2912struct FileRow {
2913    language: Language,
2914    kind: TargetKind,
2915}
2916
2917#[derive(Debug)]
2918struct GraphReindexFile {
2919    id: i64,
2920    path: String,
2921    language: Language,
2922    kind: TargetKind,
2923}
2924
2925#[derive(Debug)]
2926struct GraphPathRow {
2927    language: String,
2928    sha256: String,
2929    indexed_revision: String,
2930}
2931
2932fn rank_docs_for_symbol(symbol: &crate::query::symbol::SymbolHit, hits: &mut [SearchHit]) {
2933    let source_module = module_stem(&symbol.path);
2934    let symbol_name = symbol.name.to_ascii_lowercase();
2935    let qualified_name = symbol.qualified_name.to_ascii_lowercase();
2936    hits.sort_by(|a, b| {
2937        let a_rank = docs_locality_rank(symbol, &source_module, &symbol_name, &qualified_name, a);
2938        let b_rank = docs_locality_rank(symbol, &source_module, &symbol_name, &qualified_name, b);
2939        a_rank
2940            .cmp(&b_rank)
2941            .then_with(|| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal))
2942            .then_with(|| a.path.cmp(&b.path))
2943            .then_with(|| a.start_line.cmp(&b.start_line))
2944    });
2945    for (idx, hit) in hits.iter_mut().enumerate() {
2946        hit.score = (10_000usize.saturating_sub(idx)) as f64;
2947    }
2948}
2949
2950fn docs_locality_rank(
2951    symbol: &crate::query::symbol::SymbolHit,
2952    source_module: &str,
2953    symbol_name: &str,
2954    qualified_name: &str,
2955    hit: &SearchHit,
2956) -> u8 {
2957    let path = hit.path.to_ascii_lowercase();
2958    let summary = hit.summary.to_ascii_lowercase();
2959    let hit_symbol = hit.symbol_path.as_deref().unwrap_or_default().to_ascii_lowercase();
2960    if hit.path == symbol.path && hit_symbol == symbol.qualified_name.to_ascii_lowercase() {
2961        return 0;
2962    }
2963    if hit.path == symbol.path {
2964        return 1;
2965    }
2966    if !source_module.is_empty()
2967        && path.contains(source_module)
2968        && (summary.contains(symbol_name) || hit_symbol.contains(symbol_name))
2969    {
2970        return 2;
2971    }
2972    if summary.contains(qualified_name) || hit_symbol.contains(qualified_name) {
2973        return 3;
2974    }
2975    if summary.contains(symbol_name) || hit_symbol.contains(symbol_name) {
2976        return 4;
2977    }
2978    if !source_module.is_empty() && path.contains(source_module) {
2979        return 5;
2980    }
2981    9
2982}
2983
2984fn module_stem(path: &str) -> String {
2985    Path::new(path)
2986        .file_stem()
2987        .and_then(|value| value.to_str())
2988        .unwrap_or_default()
2989        .to_ascii_lowercase()
2990}
2991
2992fn dedupe_search_hits(hits: &mut Vec<SearchHit>) {
2993    let mut seen = BTreeSet::new();
2994    hits.retain(|hit| seen.insert(hit.chunk_id));
2995}
2996
2997fn bounded_summary(text: &str) -> String {
2998    text.split_whitespace().collect::<Vec<_>>().join(" ").chars().take(240).collect()
2999}
3000
3001#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
3002struct LogicalSymbolKey {
3003    language: String,
3004    path: String,
3005    name: String,
3006    qualified_name: String,
3007    kind: String,
3008    // Signature is part of the identity so that two distinct same-named symbols in one file (e.g.
3009    // `new` on two different impls — same `qualified_name`, different signatures) do NOT collapse
3010    // into one logical symbol. Genuine cfg variants share a signature, so they still group.
3011    signature: Option<String>,
3012}
3013
3014impl LogicalSymbolKey {
3015    fn from(row: &LogicalSymbolMemberRow) -> Self {
3016        Self {
3017            language: row.language.clone(),
3018            path: row.path.clone(),
3019            name: row.name.clone(),
3020            qualified_name: row.qualified_name.clone(),
3021            kind: row.kind.clone(),
3022            signature: row.signature.clone(),
3023        }
3024    }
3025
3026    /// Deterministic logical-symbol id derived from the key, so it is **stable across reindex**
3027    /// (the table is fully rebuilt each pass; an autoincrement rowid would churn the id every
3028    /// time, breaking any cached id or logical-symbol-bound memory). A 63-bit truncation of the
3029    /// key's SHA-256 — collisions are astronomically unlikely across a repo's symbols, and a
3030    /// collision would surface as a loud primary-key error on rebuild rather than silent merging.
3031    fn stable_id(&self) -> i64 {
3032        let canonical = format!(
3033            "{}\u{1f}{}\u{1f}{}\u{1f}{}\u{1f}{}\u{1f}{}",
3034            self.language,
3035            self.path,
3036            self.name,
3037            self.qualified_name,
3038            self.kind,
3039            self.signature.as_deref().unwrap_or(""),
3040        );
3041        let digest = Sha256::digest(canonical.as_bytes());
3042        let mut bytes = [0u8; 8];
3043        bytes.copy_from_slice(&digest[..8]);
3044        (u64::from_be_bytes(bytes) >> 1) as i64
3045    }
3046}
3047
3048#[derive(Debug, Clone)]
3049struct LogicalSymbolMemberRow {
3050    symbol_id: i64,
3051    path: String,
3052    language: String,
3053    name: String,
3054    qualified_name: String,
3055    kind: String,
3056    signature: Option<String>,
3057    start_line: i64,
3058    end_line: i64,
3059}
3060
3061fn symbol_line_for_byte(
3062    text: &str,
3063    chunk_start_byte: usize,
3064    chunk_start_line: i64,
3065    byte: usize,
3066) -> i64 {
3067    if byte <= chunk_start_byte {
3068        return chunk_start_line.max(1);
3069    }
3070    let local = byte.saturating_sub(chunk_start_byte).min(text.len());
3071    chunk_start_line
3072        + i64::try_from(text[..local].bytes().filter(|byte| *byte == b'\n').count()).unwrap_or(0)
3073}
3074
3075fn graph_only_reason(edge: &crate::query::graph::GraphHop, current_line: Option<&str>) -> String {
3076    let Some(line) = current_line else {
3077        return "missing_current_source_line".to_string();
3078    };
3079    if edge
3080        .target_qualified_name
3081        .as_deref()
3082        .is_some_and(|qualified| !qualified.is_empty() && line.contains(qualified))
3083    {
3084        return "qualified_call_pattern_mismatch".to_string();
3085    }
3086    if edge.target.as_deref().is_some_and(|target| !target.is_empty() && line.contains(target)) {
3087        return "imported_or_unqualified_call".to_string();
3088    }
3089    if edge
3090        .evidence
3091        .as_deref()
3092        .is_some_and(|evidence| !evidence.is_empty() && line.contains(evidence.trim()))
3093    {
3094        return "regex_too_narrow".to_string();
3095    }
3096    "stale_or_overbroad_graph_edge".to_string()
3097}
3098
3099fn is_likely_false_positive_graph_only(
3100    edge: &crate::query::graph::GraphHop,
3101    graph_only: &crate::query::graph::GraphOnlyEdge,
3102) -> bool {
3103    if graph_only.likely_reason == "stale_or_overbroad_graph_edge" {
3104        return true;
3105    }
3106    edge.resolution == "target_name_fallback"
3107        || edge.confidence == "name_only"
3108        || edge.confidence == "ambiguous"
3109        || !edge.verified_target_symbol
3110}
3111
3112fn classify_text_only_hit(
3113    path: &str,
3114    text: &str,
3115    parser_failure_paths: &BTreeSet<String>,
3116) -> &'static str {
3117    if parser_failure_paths.contains(path) {
3118        return "parser_failure";
3119    }
3120    if is_generated_path(path) {
3121        return "generated_text_mention";
3122    }
3123    let trimmed = text.trim_start();
3124    if is_comment_like_text(trimmed) {
3125        return "comment_text_mention";
3126    }
3127    if is_import_or_declaration_text(trimmed) {
3128        return "declaration_text_mention";
3129    }
3130    if is_test_like_path(path) && is_test_scaffolding_text(trimmed) {
3131        return "test_scaffolding_text_mention";
3132    }
3133    "parser_call_extraction"
3134}
3135
3136fn is_likely_parser_gap_kind(kind: &str) -> bool {
3137    matches!(kind, "parser_call_extraction" | "parser_failure")
3138}
3139
3140fn is_generated_path(path: &str) -> bool {
3141    path.contains("/generated/")
3142        || path.contains("/generated-web/")
3143        || path.ends_with(".d.ts")
3144        || path.ends_with("_bg.wasm.d.ts")
3145}
3146
3147fn is_comment_like_text(text: &str) -> bool {
3148    text.starts_with("//")
3149        || text.starts_with("/*")
3150        || text.starts_with('*')
3151        || text.starts_with("*/")
3152        || text.starts_with("#")
3153}
3154
3155fn is_import_or_declaration_text(text: &str) -> bool {
3156    text.starts_with("import ")
3157        || text.starts_with("export type ")
3158        || text.starts_with("export interface ")
3159        || text.starts_with("type ")
3160        || text.starts_with("interface ")
3161        || text.starts_with("declare ")
3162}
3163
3164fn is_test_scaffolding_text(text: &str) -> bool {
3165    text.contains(".mock")
3166        || text.contains("jest.")
3167        || text.contains("jest<")
3168        || text.contains("expect(")
3169        || text.contains("toHaveBeen")
3170        || text.contains("describe(")
3171        || text.contains("it(")
3172        || text.contains("test(")
3173}
3174
3175fn recommended_graph_text_fallback(
3176    parser_gaps: &[crate::query::graph::TextOnlyHit],
3177    graph_only_edges: &[crate::query::graph::GraphOnlyEdge],
3178) -> String {
3179    match (parser_gaps.is_empty(), graph_only_edges.is_empty()) {
3180        (false, false) => "both",
3181        (false, true) => "text",
3182        (true, false) => "graph",
3183        (true, true) => "none",
3184    }
3185    .to_string()
3186}
3187
3188fn compare_pattern_match_mode(pattern: &str, symbol_name: &str) -> String {
3189    if symbol_name.is_empty() {
3190        return "regex".to_string();
3191    }
3192    let escaped_call = format!("{symbol_name}\\(");
3193    let plain_call = format!("{symbol_name}(");
3194    if pattern.contains("\\b")
3195        || pattern.contains("\\W")
3196        || pattern.contains("[^")
3197        || pattern.contains(&escaped_call)
3198        || pattern.contains(&plain_call)
3199    {
3200        return "identifier_or_call".to_string();
3201    }
3202    if pattern.contains(symbol_name) {
3203        return "substring_identifier".to_string();
3204    }
3205    "regex".to_string()
3206}
3207
3208fn is_test_like_path(path: &str) -> bool {
3209    let lower = path.to_ascii_lowercase();
3210    lower.contains("/test/")
3211        || lower.contains("/tests/")
3212        || lower.contains("/__tests__/")
3213        || lower.ends_with("_test.rs")
3214        || lower.ends_with(".test.ts")
3215        || lower.ends_with(".test.tsx")
3216        || lower.ends_with(".spec.ts")
3217        || lower.ends_with(".spec.tsx")
3218}
3219
3220#[derive(Debug)]
3221struct IndexedFile {
3222    path: String,
3223    sha256: String,
3224}
3225
3226#[derive(Debug, Clone)]
3227struct IndexFile {
3228    full_path: PathBuf,
3229    relative_path: PathBuf,
3230    language: Language,
3231    kind: TargetKind,
3232    commit_sha: String,
3233    worktree_id: String,
3234}
3235
3236#[derive(Debug, Clone)]
3237struct FileScope {
3238    commit_sha: String,
3239    worktree_id: String,
3240}
3241
3242impl FileScope {
3243    fn commit(commit_sha: String) -> Self {
3244        Self { commit_sha, worktree_id: String::new() }
3245    }
3246
3247    fn worktree(worktree_id: String) -> Self {
3248        Self { commit_sha: String::new(), worktree_id }
3249    }
3250}
3251
3252#[derive(Debug)]
3253struct PreparedIndexFile {
3254    file: IndexFile,
3255    prepared: anyhow::Result<PreparedIndexContent>,
3256}
3257
3258#[derive(Debug)]
3259struct PreparedIndexContent {
3260    modified_at_ms: i64,
3261    text: String,
3262    sha256: String,
3263    chunks: Vec<Chunk>,
3264    symbols: Vec<Symbol>,
3265    parser_failure: Option<String>,
3266}
3267
3268#[derive(Debug)]
3269struct DiscoveryPlan {
3270    files: Vec<IndexFile>,
3271    deleted: BTreeSet<PathBuf>,
3272    unindexed: Vec<IndexFile>,
3273    changed: Vec<PathBuf>,
3274    discovered_files: usize,
3275    indexed_files: usize,
3276}
3277
3278#[derive(Debug, Default)]
3279struct GitChangedPaths {
3280    changed: BTreeSet<PathBuf>,
3281    deleted: BTreeSet<PathBuf>,
3282}
3283
3284fn collect_index_files(config: &Config) -> anyhow::Result<Vec<IndexFile>> {
3285    let mut targets = config.targets.iter().collect::<Vec<_>>();
3286    targets.sort_by_key(|target| match target.kind {
3287        TargetKind::Generated => 0,
3288        TargetKind::Tests => 1,
3289        TargetKind::Docs => 2,
3290        TargetKind::Source => 3,
3291    });
3292    let mut seen = BTreeSet::new();
3293    let mut files = Vec::new();
3294
3295    for target in targets {
3296        for file in walker::walk_target(&config.root, target)? {
3297            let relative_path = file.strip_prefix(&config.root)?.to_path_buf();
3298            if !seen.insert(relative_path.clone()) {
3299                continue;
3300            }
3301            files.push(IndexFile {
3302                full_path: file,
3303                relative_path,
3304                language: target.language,
3305                kind: target.kind,
3306                commit_sha: String::new(),
3307                worktree_id: String::new(),
3308            });
3309        }
3310    }
3311
3312    Ok(files)
3313}
3314
3315fn collect_changed_index_files(
3316    config: &Config,
3317    changes: &GitChangedPaths,
3318) -> anyhow::Result<Vec<IndexFile>> {
3319    let mut files = Vec::new();
3320    for relative_path in &changes.changed {
3321        let full_path = config.root.join(relative_path);
3322        if !full_path.is_file() {
3323            continue;
3324        }
3325        let Some((language, kind)) = target_for_path(config, relative_path) else {
3326            continue;
3327        };
3328        files.push(IndexFile {
3329            full_path,
3330            relative_path: relative_path.clone(),
3331            language,
3332            kind,
3333            commit_sha: String::new(),
3334            worktree_id: String::new(),
3335        });
3336    }
3337    Ok(files)
3338}
3339
3340fn spawn_git_history_prepare(
3341    root: &Path,
3342) -> JoinHandle<anyhow::Result<git_history::PreparedGitHistory>> {
3343    let root = root.to_path_buf();
3344    thread::spawn(move || git_history::prepare(&root))
3345}
3346
3347fn join_git_history_prepare(
3348    handle: JoinHandle<anyhow::Result<git_history::PreparedGitHistory>>,
3349) -> anyhow::Result<git_history::PreparedGitHistory> {
3350    handle.join().map_err(|_| anyhow::anyhow!("git history preparation panicked"))?
3351}
3352
3353fn prepare_index_file(file: &IndexFile) -> PreparedIndexFile {
3354    PreparedIndexFile { file: file.clone(), prepared: prepare_index_content(file) }
3355}
3356
3357fn prepare_files_with_progress<F>(
3358    files: &[IndexFile],
3359    progress: &mut F,
3360) -> anyhow::Result<Vec<PreparedIndexFile>>
3361where
3362    F: FnMut(IndexProgress),
3363{
3364    #[derive(Debug)]
3365    struct PreparedProgress {
3366        current: usize,
3367        total: usize,
3368        path: PathBuf,
3369        language: Language,
3370        kind: TargetKind,
3371    }
3372
3373    let total = files.len();
3374    let prepared = thread::scope(|scope| {
3375        let (tx, rx) = mpsc::channel();
3376        let completed = AtomicUsize::new(0);
3377        let handle = scope.spawn(move || {
3378            files
3379                .par_iter()
3380                .map(|file| {
3381                    let prepared = prepare_index_file(file);
3382                    let current = completed.fetch_add(1, Ordering::Relaxed) + 1;
3383                    if should_report_file_progress(current, total) {
3384                        let _ = tx.send(PreparedProgress {
3385                            current,
3386                            total,
3387                            path: file.relative_path.clone(),
3388                            language: file.language,
3389                            kind: file.kind,
3390                        });
3391                    }
3392                    prepared
3393                })
3394                .collect::<Vec<_>>()
3395        });
3396
3397        for event in rx {
3398            progress(IndexProgress::PreparingFile {
3399                current: event.current,
3400                total: event.total,
3401                path: event.path,
3402                language: event.language,
3403                kind: event.kind,
3404            });
3405        }
3406
3407        handle.join().map_err(|_| anyhow::anyhow!("parallel file preparation panicked"))
3408    })?;
3409    Ok(prepared)
3410}
3411
3412fn should_report_file_progress(current: usize, total: usize) -> bool {
3413    if total == 0 {
3414        return false;
3415    }
3416    current == 1
3417        || current == total
3418        || current.saturating_mul(10) / total
3419            != current.saturating_sub(1).saturating_mul(10) / total
3420}
3421
3422fn prepare_index_content(file: &IndexFile) -> anyhow::Result<PreparedIndexContent> {
3423    let text = fs::read_to_string(&file.full_path)?;
3424    let modified_at_ms = file_metadata_ms(&file.full_path)?;
3425    let sha256 = hex_sha256(text.as_bytes());
3426    let parser_failure =
3427        if file.language != Language::Markdown && file.kind != TargetKind::Generated {
3428            if text.len() > chunker::MAX_STRUCTURAL_PARSE_BYTES {
3429                None
3430            } else {
3431                parser::parse_error(&file.relative_path, file.language, &text)
3432                    .unwrap_or_else(|err| Some(err.to_string()))
3433            }
3434        } else {
3435            None
3436        };
3437    let chunks = if file.kind == TargetKind::Generated {
3438        chunker::generated_chunks_for_file(&file.relative_path, &text)
3439    } else {
3440        chunker::chunks_for_file(&file.relative_path, file.language, &text)
3441    };
3442    let symbols =
3443        if file.kind == TargetKind::Generated || text.len() > chunker::MAX_STRUCTURAL_PARSE_BYTES {
3444            Vec::new()
3445        } else {
3446            symbols::symbols_for_file(&file.relative_path, file.language, &text)
3447        };
3448    Ok(PreparedIndexContent { modified_at_ms, text, sha256, chunks, symbols, parser_failure })
3449}
3450
3451fn discovery_plan(conn: &rusqlite::Connection, config: &Config) -> anyhow::Result<DiscoveryPlan> {
3452    let discovered = collect_index_files(config)?;
3453    let mut indexed = indexed_file_map(conn)?;
3454    let mut current_paths = BTreeSet::new();
3455    let mut files = Vec::new();
3456    let mut unindexed = Vec::new();
3457    let mut changed = Vec::new();
3458    let discovered_files = discovered.len();
3459    let hashed = discovered
3460        .par_iter()
3461        .map(|file| -> anyhow::Result<(IndexFile, String)> {
3462            let text = fs::read(&file.full_path)?;
3463            Ok((file.clone(), hex_sha256(&text)))
3464        })
3465        .collect::<Vec<_>>();
3466
3467    for hashed_file in hashed {
3468        let (file, current_hash) = hashed_file?;
3469        let relative = path_string(&file.relative_path);
3470        current_paths.insert(file.relative_path.clone());
3471        let Some(indexed_hash) = indexed.remove(&relative) else {
3472            unindexed.push(file.clone());
3473            files.push(file);
3474            continue;
3475        };
3476        if current_hash != indexed_hash {
3477            changed.push(file.relative_path.clone());
3478            files.push(file);
3479        }
3480    }
3481
3482    let deleted = indexed
3483        .into_keys()
3484        .map(PathBuf::from)
3485        .filter(|path| !current_paths.contains(path))
3486        .collect::<BTreeSet<_>>();
3487
3488    Ok(DiscoveryPlan {
3489        discovered_files,
3490        indexed_files: current_paths
3491            .len()
3492            .saturating_add(deleted.len())
3493            .saturating_sub(unindexed.len()),
3494        files,
3495        deleted,
3496        unindexed,
3497        changed,
3498    })
3499}
3500
3501fn indexed_file_map(conn: &rusqlite::Connection) -> anyhow::Result<BTreeMap<String, String>> {
3502    let mut stmt = conn.prepare("SELECT path, sha256 FROM files ORDER BY path")?;
3503    let rows =
3504        stmt.query_map([], |row| Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)))?;
3505    let mut files = BTreeMap::new();
3506    for row in rows {
3507        let (path, sha256) = row?;
3508        files.insert(path, sha256);
3509    }
3510    Ok(files)
3511}
3512
3513pub(crate) fn target_for_path(
3514    config: &Config,
3515    relative_path: &Path,
3516) -> Option<(Language, TargetKind)> {
3517    let relative = path_string(relative_path);
3518    let language = Language::from_path(relative_path)?;
3519    let mut targets = config.targets.iter().collect::<Vec<_>>();
3520    targets.sort_by_key(|target| match target.kind {
3521        TargetKind::Generated => 0,
3522        TargetKind::Tests => 1,
3523        TargetKind::Docs => 2,
3524        TargetKind::Source => 3,
3525    });
3526    targets.into_iter().find_map(|target| {
3527        if target.language != language {
3528            return None;
3529        }
3530        if !target.directories.iter().any(|directory| {
3531            directory.as_os_str().is_empty()
3532                || directory == Path::new(".")
3533                || relative_path.starts_with(directory)
3534        }) {
3535            return None;
3536        }
3537        if target.exclude.iter().any(|pattern| matches_simple_pattern(&relative, pattern)) {
3538            return None;
3539        }
3540        if !target.include.iter().any(|pattern| matches_simple_pattern(&relative, pattern)) {
3541            return None;
3542        }
3543        Some((target.language, target.kind))
3544    })
3545}
3546
3547fn git_changed_paths(root: &Path) -> anyhow::Result<GitChangedPaths> {
3548    let repo = gix::discover(root)?;
3549    let worktree_root = repo
3550        .workdir()
3551        .ok_or_else(|| anyhow::anyhow!("git repository has no worktree"))?
3552        .to_path_buf();
3553    let pathspec = config_root_pathspec(&worktree_root, root);
3554    let mut paths = GitChangedPaths::default();
3555
3556    for item in repo
3557        .status(gix::progress::Discard)?
3558        .untracked_files(UntrackedFiles::Files)
3559        .tree_index_track_renames(tree_index::TrackRenames::Disabled)
3560        .into_iter([pathspec])?
3561    {
3562        let item = item?;
3563        let Some(path) = repo_relative_path_to_config_path(&worktree_root, root, item.location())
3564        else {
3565            continue;
3566        };
3567        if root.join(&path).exists() {
3568            if !paths.deleted.contains(&path) {
3569                paths.changed.insert(path);
3570            }
3571        } else {
3572            paths.changed.remove(&path);
3573            paths.deleted.insert(path);
3574        }
3575    }
3576
3577    Ok(paths)
3578}
3579
3580fn repo_relative_path_to_config_path(
3581    worktree_root: &Path,
3582    config_root: &Path,
3583    repo_relative_path: &gix::bstr::BStr,
3584) -> Option<PathBuf> {
3585    let path = PathBuf::from(repo_relative_path.to_str_lossy().as_ref());
3586    worktree_root.join(path).strip_prefix(config_root).ok().map(Path::to_path_buf)
3587}
3588
3589fn config_root_pathspec(worktree_root: &Path, config_root: &Path) -> BString {
3590    let relative = config_root.strip_prefix(worktree_root).unwrap_or_else(|_| Path::new(""));
3591    let relative = path_string(relative);
3592    if relative.is_empty() || relative == "." {
3593        BString::from("*")
3594    } else {
3595        BString::from(format!("{relative}/**"))
3596    }
3597}
3598
3599fn matches_simple_pattern(path: &str, pattern: &str) -> bool {
3600    if let Some(extension) = pattern.strip_prefix("**/*.") {
3601        return path.ends_with(&format!(".{extension}"));
3602    }
3603    if let Some(prefix) = pattern.strip_suffix("/**") {
3604        return path.starts_with(prefix);
3605    }
3606    path == pattern || path.contains(pattern.trim_matches('*'))
3607}
3608
3609fn meta_for(conn: &rusqlite::Connection, key: &str) -> anyhow::Result<Option<String>> {
3610    Ok(conn
3611        .query_row("SELECT value FROM index_meta WHERE key = ?1", [key], |row| row.get(0))
3612        .optional()?)
3613}
3614
3615fn git_output(root: &Path, args: &[&str]) -> Option<String> {
3616    let output = Command::new("git").args(args).current_dir(root).output().ok()?;
3617    if !output.status.success() {
3618        return None;
3619    }
3620    Some(String::from_utf8_lossy(&output.stdout).trim().to_string())
3621}
3622
3623fn resolve_git_context(root: &Path) -> (String, String) {
3624    let commit_sha =
3625        git_output(root, &["rev-parse", "HEAD"]).map(|s| s.trim().to_string()).unwrap_or_default();
3626    let worktree_id = root.to_string_lossy().trim_end_matches('/').to_string();
3627    (commit_sha, worktree_id)
3628}
3629
3630/// The live (commit_sha, worktree_id) keys across every worktree that shares this repo, from
3631/// `git worktree list --porcelain`. Each worktree contributes its HEAD commit (for clean rows)
3632/// and its path (for dirty/overlay rows). Returns empty vecs outside a git worktree.
3633fn live_worktree_contexts(root: &Path) -> (Vec<String>, Vec<String>) {
3634    let mut commits = Vec::new();
3635    let mut worktrees = Vec::new();
3636    let Some(output) = git_output(root, &["worktree", "list", "--porcelain"]) else {
3637        return (commits, worktrees);
3638    };
3639    for line in output.lines() {
3640        if let Some(path) = line.strip_prefix("worktree ") {
3641            worktrees.push(path.trim().trim_end_matches('/').to_string());
3642        } else if let Some(sha) = line.strip_prefix("HEAD ") {
3643            commits.push(sha.trim().to_string());
3644        }
3645    }
3646    (commits, worktrees)
3647}
3648
3649fn table_row_count(conn: &rusqlite::Connection, table: &str) -> anyhow::Result<u64> {
3650    // `table` is always an internal string literal, never user input.
3651    let count = conn
3652        .query_row(&format!("SELECT COUNT(*) FROM main.{table}"), [], |row| row.get::<_, i64>(0))?;
3653    Ok(u64::try_from(count).unwrap_or(0))
3654}
3655
3656fn file_metadata_ms(path: &Path) -> anyhow::Result<i64> {
3657    let modified = fs::metadata(path)?.modified()?;
3658    Ok(duration_ms(modified.duration_since(UNIX_EPOCH)?))
3659}
3660
3661fn now_ms() -> i64 {
3662    duration_ms(SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default())
3663}
3664
3665fn duration_ms(duration: std::time::Duration) -> i64 {
3666    i64::try_from(duration.as_millis()).unwrap_or(i64::MAX)
3667}
3668
3669fn hex_sha256(bytes: &[u8]) -> String {
3670    let hash = Sha256::digest(bytes);
3671    let mut out = String::with_capacity(hash.len() * 2);
3672    for byte in hash {
3673        use std::fmt::Write as _;
3674        let _ = write!(out, "{byte:02x}");
3675    }
3676    out
3677}
3678
3679fn path_string(path: &Path) -> String {
3680    path.to_string_lossy().replace('\\', "/")
3681}
3682
3683#[cfg(test)]
3684mod schema_bootstrap_tests {
3685    use std::sync::atomic::{AtomicU64, Ordering};
3686
3687    use super::*;
3688    use crate::config::ResolvedTarget;
3689
3690    static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
3691
3692    #[test]
3693    fn rebuild_bootstraps_sqlite_schema_for_empty_target_root() {
3694        let root = unique_temp_root();
3695        let _ = fs::remove_dir_all(&root);
3696        let docs = root.join("docs");
3697        fs::create_dir_all(&docs).unwrap();
3698
3699        let config = Config {
3700            root: root.clone(),
3701            database: root.join(".rag-rat/index.sqlite"),
3702            targets: vec![ResolvedTarget {
3703                name: "markdown".to_string(),
3704                language: Language::Markdown,
3705                directories: vec![PathBuf::from("docs")],
3706                include: vec!["**/*.md".to_string()],
3707                exclude: Vec::new(),
3708                kind: TargetKind::Docs,
3709            }],
3710            local_ai: Default::default(),
3711            watch: Default::default(),
3712        };
3713
3714        let db = IndexDatabase::rebuild(&config).unwrap();
3715        assert!(config.database.exists());
3716        assert_eq!(table_count(&db, "files"), 1);
3717        assert_eq!(table_count(&db, "chunks"), 1);
3718        assert_eq!(table_count(&db, "symbols"), 1);
3719        assert_eq!(table_count(&db, "parser_failures"), 1);
3720        assert_eq!(table_count(&db, "index_meta"), 1);
3721        assert_eq!(table_count(&db, "chunk_fts"), 1);
3722        assert_eq!(table_count(&db, "git_commits"), 1);
3723        assert_eq!(table_count(&db, "git_file_changes"), 1);
3724        assert_eq!(table_count(&db, "git_chunk_blame"), 1);
3725        assert_eq!(table_count(&db, "commit_fts"), 1);
3726        assert_eq!(table_count(&db, "ai_models"), 1);
3727        assert_eq!(table_count(&db, "chunk_embeddings"), 1);
3728        assert_eq!(table_count(&db, "chunk_summaries"), 1);
3729        assert_eq!(table_count(&db, "reconcile_meta"), 1);
3730        assert_eq!(table_count(&db, "reconcile_attempts"), 1);
3731        assert!(file_columns(&db).contains(&"indexed_revision".to_string()));
3732        assert_eq!(indexed_revision_count(&db), 0);
3733        assert!(chunk_columns(&db).contains(&"anchor_version".to_string()));
3734        assert!(chunk_columns(&db).contains(&"normalized_hash".to_string()));
3735        assert!(chunk_columns(&db).contains(&"start_boundary_hash".to_string()));
3736        assert!(chunk_columns(&db).contains(&"end_boundary_hash".to_string()));
3737        assert!(chunk_columns(&db).contains(&"source_revision".to_string()));
3738        let embedding_columns = table_columns(&db, "chunk_embeddings");
3739        assert!(embedding_columns.contains(&"model_version".to_string()));
3740        assert!(embedding_columns.contains(&"input_hash".to_string()));
3741        assert!(embedding_columns.contains(&"embedding_text_version".to_string()));
3742        assert!(embedding_columns.contains(&"embedding_policy".to_string()));
3743        assert!(embedding_columns.contains(&"embedding_priority".to_string()));
3744        assert!(embedding_columns.contains(&"input_chars".to_string()));
3745        assert!(embedding_columns.contains(&"input_truncated".to_string()));
3746        assert!(embedding_columns.contains(&"attempt_count".to_string()));
3747        assert!(embedding_columns.contains(&"next_retry_after_ms".to_string()));
3748        assert!(embedding_columns.contains(&"computed_at_ms".to_string()));
3749        let edge_columns = table_columns(&db, "edges");
3750        assert!(edge_columns.contains(&"source_start_line".to_string()));
3751        assert!(edge_columns.contains(&"source_end_line".to_string()));
3752        assert!(edge_columns.contains(&"source_start_byte".to_string()));
3753        assert!(edge_columns.contains(&"source_end_byte".to_string()));
3754        assert!(edge_columns.contains(&"target_start_line".to_string()));
3755        assert!(edge_columns.contains(&"target_end_line".to_string()));
3756        assert!(edge_columns.contains(&"target_qualified_name".to_string()));
3757        assert!(edge_columns.contains(&"evidence".to_string()));
3758        assert!(edge_columns.contains(&"receiver_hint".to_string()));
3759        assert!(edge_columns.contains(&"resolution".to_string()));
3760        let logical_columns = table_columns(&db, "logical_symbols");
3761        assert!(logical_columns.contains(&"qualified_name".to_string()));
3762        assert!(logical_columns.contains(&"variant_count".to_string()));
3763        let member_columns = table_columns(&db, "logical_symbol_members");
3764        assert!(member_columns.contains(&"symbol_id".to_string()));
3765        assert!(member_columns.contains(&"signature_hash".to_string()));
3766        let github_ref_sync_columns = table_columns(&db, "github_ref_sync");
3767        assert!(github_ref_sync_columns.contains(&"status".to_string()));
3768        assert!(github_ref_sync_columns.contains(&"last_error".to_string()));
3769        let symbol_fact_columns = table_columns(&db, "symbol_facts");
3770        assert!(symbol_fact_columns.contains(&"fact_kind".to_string()));
3771        assert!(symbol_fact_columns.contains(&"fact_value".to_string()));
3772        assert_eq!(
3773            db.status(&config.database).unwrap().schema.current_version,
3774            schema::LATEST_SCHEMA_VERSION
3775        );
3776
3777        fs::remove_dir_all(root).unwrap();
3778    }
3779
3780    #[test]
3781    fn rebuild_reports_file_preparation_progress() {
3782        let root = unique_temp_root();
3783        let _ = fs::remove_dir_all(&root);
3784        fs::create_dir_all(root.join("src")).unwrap();
3785        fs::write(root.join("src/lib.rs"), "pub fn exported() {}\n").unwrap();
3786
3787        let config = source_config(root.clone(), Language::Rust);
3788        let mut events = Vec::new();
3789        IndexDatabase::rebuild_with_progress(&config, |progress| events.push(progress)).unwrap();
3790
3791        assert!(
3792            events.iter().any(|event| matches!(event, IndexProgress::PreparingFile { .. })),
3793            "missing preparing progress event: {events:?}"
3794        );
3795        assert!(
3796            events.iter().any(|event| matches!(event, IndexProgress::IndexingFile { .. })),
3797            "missing indexing progress event: {events:?}"
3798        );
3799
3800        fs::remove_dir_all(root).unwrap();
3801    }
3802
3803    #[test]
3804    fn file_progress_reports_first_final_and_decile_boundaries() {
3805        let reported = (1..=100)
3806            .filter(|current| should_report_file_progress(*current, 100))
3807            .collect::<Vec<_>>();
3808        assert_eq!(reported, vec![1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]);
3809    }
3810
3811    #[test]
3812    fn compatible_open_requires_recorded_schema_version() {
3813        let root = unique_temp_root();
3814        let _ = fs::remove_dir_all(&root);
3815        fs::create_dir_all(root.join(".rag-rat")).unwrap();
3816        let database = root.join(".rag-rat/index.sqlite");
3817        IndexDatabase::migrate(&database).unwrap();
3818        let conn = rusqlite::Connection::open(&database).unwrap();
3819        conn.execute_batch("DROP TABLE schema_version;").unwrap();
3820        drop(conn);
3821
3822        let status = IndexDatabase::migration_check(&database).unwrap();
3823        assert_eq!(status.state, schema::SchemaState::Older);
3824        let err = IndexDatabase::open(&database).unwrap_err().to_string();
3825        assert!(err.contains("run `rag-rat migrate`"), "{err}");
3826
3827        let migrated = IndexDatabase::migrate(&database).unwrap();
3828        assert_eq!(migrated.state, schema::SchemaState::Compatible);
3829        IndexDatabase::open(&database).unwrap();
3830
3831        fs::remove_dir_all(root).unwrap();
3832    }
3833
3834    #[test]
3835    fn migrate_adds_edge_name_columns_before_indexing_them() {
3836        let root = unique_temp_root();
3837        let _ = fs::remove_dir_all(&root);
3838        fs::create_dir_all(root.join(".rag-rat")).unwrap();
3839        let database = root.join(".rag-rat/index.sqlite");
3840        let conn = rusqlite::Connection::open(&database).unwrap();
3841        conn.execute_batch(
3842            "
3843            CREATE TABLE files(
3844                id INTEGER PRIMARY KEY AUTOINCREMENT,
3845                path TEXT NOT NULL UNIQUE,
3846                language TEXT NOT NULL,
3847                kind TEXT NOT NULL,
3848                sha256 TEXT NOT NULL,
3849                modified_at_ms INTEGER NOT NULL,
3850                generated INTEGER NOT NULL DEFAULT 0,
3851                indexed_at_ms INTEGER NOT NULL
3852            );
3853            CREATE TABLE chunks(
3854                id INTEGER PRIMARY KEY AUTOINCREMENT,
3855                file_id INTEGER NOT NULL,
3856                chunk_kind TEXT NOT NULL,
3857                symbol_path TEXT,
3858                start_byte INTEGER NOT NULL,
3859                end_byte INTEGER NOT NULL,
3860                start_line INTEGER NOT NULL,
3861                end_line INTEGER NOT NULL,
3862                text TEXT NOT NULL,
3863                text_hash TEXT NOT NULL
3864            );
3865            CREATE TABLE symbols(
3866                id INTEGER PRIMARY KEY AUTOINCREMENT,
3867                file_id INTEGER NOT NULL,
3868                language TEXT NOT NULL,
3869                name TEXT NOT NULL,
3870                qualified_name TEXT NOT NULL,
3871                kind TEXT NOT NULL,
3872                start_byte INTEGER NOT NULL,
3873                end_byte INTEGER NOT NULL,
3874                signature TEXT,
3875                docs TEXT
3876            );
3877            CREATE TABLE edges(
3878                id INTEGER PRIMARY KEY AUTOINCREMENT,
3879                from_symbol_id INTEGER,
3880                to_symbol_id INTEGER,
3881                edge_kind TEXT NOT NULL,
3882                confidence TEXT NOT NULL
3883            );
3884            ",
3885        )
3886        .unwrap();
3887        drop(conn);
3888
3889        let migrated = IndexDatabase::migrate(&database).unwrap();
3890        assert_eq!(migrated.state, schema::SchemaState::Compatible);
3891        let db = IndexDatabase::open(&database).unwrap();
3892        let columns = table_columns(&db, "edges");
3893        assert!(columns.contains(&"from_name".to_string()));
3894        assert!(columns.contains(&"to_name".to_string()));
3895        assert!(columns.contains(&"source_start_line".to_string()));
3896        assert!(columns.contains(&"source_end_line".to_string()));
3897        assert!(columns.contains(&"source_start_byte".to_string()));
3898        assert!(columns.contains(&"source_end_byte".to_string()));
3899        assert!(columns.contains(&"target_start_line".to_string()));
3900        assert!(columns.contains(&"target_end_line".to_string()));
3901        assert_eq!(table_count(&db, "idx_edges_from_name"), 1);
3902        assert_eq!(table_count(&db, "idx_edges_to_name"), 1);
3903
3904        fs::remove_dir_all(root).unwrap();
3905    }
3906
3907    #[test]
3908    fn migrate_preserves_github_papertrail_cache() {
3909        let (root, config) =
3910            markdown_config("# Decision\nRefs cq27-dev/rag-rat#42\nwe will keep sqlite\n");
3911        let db = IndexDatabase::rebuild(&config).unwrap();
3912        github::sync_from_refs(db.storage.connection(), &root, Some(&MockGitHubClient), false)
3913            .unwrap();
3914        assert_eq!(row_count(&db, "github_refs"), 1);
3915        assert_eq!(row_count(&db, "github_issues"), 1);
3916        assert_eq!(row_count(&db, "github_comments"), 1);
3917        assert_eq!(row_count(&db, "github_pull_requests"), 1);
3918        assert_eq!(row_count(&db, "github_reviews"), 1);
3919        assert_eq!(row_count(&db, "github_review_comments"), 1);
3920        assert_eq!(row_count(&db, "github_fts"), 5);
3921        db.storage
3922            .connection()
3923            .execute("DELETE FROM schema_version WHERE id = ?1", ["010_symbol_facts"])
3924            .unwrap();
3925        drop(db);
3926
3927        let migrated = IndexDatabase::migrate(&config.database).unwrap();
3928        assert_eq!(migrated.state, schema::SchemaState::Compatible);
3929        let db = IndexDatabase::open(&config.database).unwrap();
3930        assert_eq!(row_count(&db, "github_refs"), 1);
3931        assert_eq!(row_count(&db, "github_issues"), 1);
3932        assert_eq!(row_count(&db, "github_comments"), 1);
3933        assert_eq!(row_count(&db, "github_pull_requests"), 1);
3934        assert_eq!(row_count(&db, "github_reviews"), 1);
3935        assert_eq!(row_count(&db, "github_review_comments"), 1);
3936        assert_eq!(row_count(&db, "github_fts"), 5);
3937        let hits = db.github_issue_search("sqlite", 10).unwrap();
3938        assert_eq!(hits.len(), 1);
3939        assert_eq!(hits[0].number, 42);
3940
3941        fs::remove_dir_all(root).unwrap();
3942    }
3943
3944    #[test]
3945    fn full_rebuild_preserves_github_papertrail_cache() {
3946        let (root, config) =
3947            markdown_config("# Decision\nRefs cq27-dev/rag-rat#42\nwe will keep sqlite\n");
3948        let db = IndexDatabase::rebuild(&config).unwrap();
3949        github::sync_from_refs(db.storage.connection(), &root, Some(&MockGitHubClient), false)
3950            .unwrap();
3951        assert_eq!(row_count(&db, "github_issues"), 1);
3952        assert_eq!(row_count(&db, "github_fts"), 5);
3953        drop(db);
3954
3955        let db = IndexDatabase::rebuild(&config).unwrap();
3956
3957        assert_eq!(row_count(&db, "github_refs"), 1);
3958        assert_eq!(row_count(&db, "github_issues"), 1);
3959        assert_eq!(row_count(&db, "github_comments"), 1);
3960        assert_eq!(row_count(&db, "github_pull_requests"), 1);
3961        assert_eq!(row_count(&db, "github_reviews"), 1);
3962        assert_eq!(row_count(&db, "github_review_comments"), 1);
3963        assert_eq!(row_count(&db, "github_ref_sync"), 1);
3964        assert_eq!(row_count(&db, "github_fts"), 5);
3965        let hits = db.github_issue_search("sqlite", 10).unwrap();
3966        assert_eq!(hits.len(), 1);
3967        assert_eq!(hits[0].number, 42);
3968
3969        fs::remove_dir_all(root).unwrap();
3970    }
3971
3972    #[test]
3973    fn full_rebuild_preserves_installed_model_manifest() {
3974        let (root, config) = markdown_config("alpha token with enough detail for embeddings\n");
3975        let db = IndexDatabase::rebuild(&config).unwrap();
3976        db.install_model(ai::HASH_MODEL_ID).unwrap();
3977        let before = db.local_ai_status().unwrap();
3978        assert_eq!(before.embedding.model_id, ai::HASH_MODEL_ID);
3979        assert!(before.embedding.installed);
3980        drop(db);
3981
3982        let db = IndexDatabase::rebuild(&config).unwrap();
3983
3984        let after = db.local_ai_status().unwrap();
3985        assert_eq!(after.embedding.model_id, ai::HASH_MODEL_ID);
3986        assert!(after.embedding.installed);
3987        assert_eq!(after.embedding.state, "Ready");
3988
3989        fs::remove_dir_all(root).unwrap();
3990    }
3991
3992    #[test]
3993    fn full_rebuild_preserves_other_worktree_contexts() {
3994        let root = unique_temp_root();
3995        let _ = fs::remove_dir_all(&root);
3996        fs::create_dir_all(root.join("src")).unwrap();
3997        fs::write(root.join("src/lib.rs"), "pub fn current_context() {}\n").unwrap();
3998        let config = source_config(root.clone(), Language::Rust);
3999        let db = IndexDatabase::rebuild(&config).unwrap();
4000        let other_file_id = db
4001            .storage
4002            .connection()
4003            .query_row(
4004                "
4005                INSERT INTO main.files(
4006                    path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms,
4007                    indexed_revision, commit_sha, worktree_id
4008                )
4009                VALUES ('src/other.rs', 'rust', 'source', 'other-sha', 0, 0, 1, 'other-sha', '', 'other-worktree')
4010                RETURNING id
4011                ",
4012                [],
4013                |row| row.get::<_, i64>(0),
4014            )
4015            .unwrap();
4016        let other_chunk_id = db
4017            .storage
4018            .connection()
4019            .query_row(
4020                "
4021                INSERT INTO main.chunks(
4022                    file_id, chunk_kind, symbol_path, start_byte, end_byte, start_line, end_line,
4023                    text, text_hash, source_revision, anchor_version, normalized_hash,
4024                    start_boundary_hash, end_boundary_hash, start_context_hash, end_context_hash,
4025                    context_radius, embedding_policy, embedding_priority
4026                )
4027                VALUES (?1, 'symbol', 'other_context', 0, 12, 1, 1, 'other context', 'other-text',
4028                    'other-sha', 1, '', '', '', '', '', 2, 'Embed', 1)
4029                RETURNING id
4030                ",
4031                [other_file_id],
4032                |row| row.get::<_, i64>(0),
4033            )
4034            .unwrap();
4035        db.storage
4036            .connection()
4037            .execute(
4038                "
4039                INSERT INTO main.symbols(
4040                    file_id, language, name, qualified_name, kind, start_byte, end_byte, signature, docs
4041                )
4042                VALUES (?1, 'rust', 'other_context', 'other_context', 'function', 0, 12, NULL, NULL)
4043                ",
4044                [other_file_id],
4045            )
4046            .unwrap();
4047        db.storage
4048            .connection()
4049            .execute(
4050                "INSERT INTO main.chunk_fts(rowid, text) VALUES (?1, 'other context')",
4051                [other_chunk_id],
4052            )
4053            .unwrap();
4054        drop(db);
4055
4056        let db = IndexDatabase::rebuild(&config).unwrap();
4057
4058        assert_eq!(
4059            db.storage
4060                .connection()
4061                .query_row(
4062                    "SELECT COUNT(*) FROM main.files WHERE worktree_id = 'other-worktree'",
4063                    [],
4064                    |row| row.get::<_, i64>(0)
4065                )
4066                .unwrap(),
4067            1
4068        );
4069        assert_eq!(
4070            db.storage
4071                .connection()
4072                .query_row(
4073                    "SELECT COUNT(*) FROM main.chunks WHERE file_id = ?1",
4074                    [other_file_id],
4075                    |row| { row.get::<_, i64>(0) }
4076                )
4077                .unwrap(),
4078            1
4079        );
4080        assert_eq!(
4081            db.storage
4082                .connection()
4083                .query_row(
4084                    "SELECT COUNT(*) FROM main.symbols WHERE file_id = ?1",
4085                    [other_file_id],
4086                    |row| { row.get::<_, i64>(0) }
4087                )
4088                .unwrap(),
4089            1
4090        );
4091        assert_eq!(
4092            db.storage
4093                .connection()
4094                .query_row(
4095                    "SELECT COUNT(*) FROM main.chunk_fts WHERE rowid = ?1",
4096                    [other_chunk_id],
4097                    |row| { row.get::<_, i64>(0) }
4098                )
4099                .unwrap(),
4100            1
4101        );
4102
4103        fs::remove_dir_all(root).unwrap();
4104    }
4105
4106    #[test]
4107    fn compatible_open_refuses_dirty_and_newer_schema() {
4108        let root = unique_temp_root();
4109        let _ = fs::remove_dir_all(&root);
4110        fs::create_dir_all(root.join(".rag-rat")).unwrap();
4111        let database = root.join(".rag-rat/index.sqlite");
4112        let conn = rusqlite::Connection::open(&database).unwrap();
4113        conn.execute_batch(
4114            "
4115            CREATE TABLE schema_version(
4116                id TEXT PRIMARY KEY,
4117                applied_at_ms INTEGER NOT NULL,
4118                checksum TEXT NOT NULL,
4119                description TEXT NOT NULL
4120            );
4121            INSERT INTO schema_version(id, applied_at_ms, checksum, description)
4122            VALUES ('__dirty__', 1, '', 'partial migration in progress');
4123            ",
4124        )
4125        .unwrap();
4126        drop(conn);
4127
4128        let dirty = IndexDatabase::migration_check(&database).unwrap();
4129        assert_eq!(dirty.state, schema::SchemaState::Dirty);
4130        let err = IndexDatabase::open(&database).unwrap_err().to_string();
4131        assert!(err.contains("dirty or partial"), "{err}");
4132
4133        let conn = rusqlite::Connection::open(&database).unwrap();
4134        conn.execute_batch(
4135            "
4136            DELETE FROM schema_version;
4137            INSERT INTO schema_version(id, applied_at_ms, checksum, description)
4138            VALUES ('999_future_schema', 1, 'sha256:future', 'future schema');
4139            ",
4140        )
4141        .unwrap();
4142        drop(conn);
4143        let newer = IndexDatabase::migration_check(&database).unwrap();
4144        assert_eq!(newer.state, schema::SchemaState::Newer);
4145        let err = IndexDatabase::open(&database).unwrap_err().to_string();
4146        assert!(err.contains("newer rag-rat"), "{err}");
4147
4148        fs::remove_dir_all(root).unwrap();
4149    }
4150
4151    #[test]
4152    fn discover_mode_indexes_new_files_and_removes_deleted_files() {
4153        let root = unique_temp_root();
4154        let _ = fs::remove_dir_all(&root);
4155        fs::create_dir_all(root.join("src")).unwrap();
4156        fs::write(root.join("src/lib.rs"), "pub fn old_symbol() {}\n").unwrap();
4157        let config = source_config(root.clone(), Language::Rust);
4158        let db = IndexDatabase::rebuild(&config).unwrap();
4159        assert_eq!(db.discovery_status(&config).unwrap().unindexed_source_files, 0);
4160
4161        fs::write(root.join("src/new.rs"), "pub fn new_symbol() {}\n").unwrap();
4162        fs::remove_file(root.join("src/lib.rs")).unwrap();
4163        let drift = db.discovery_status(&config).unwrap();
4164        assert_eq!(drift.unindexed_source_files, 1);
4165        assert_eq!(drift.removed_indexed_files, 1);
4166        assert!(drift.warning.as_deref().unwrap().contains("rag-rat index --discover"));
4167
4168        let db = IndexDatabase::index_discover(&config).unwrap();
4169        let fresh = db.discovery_status(&config).unwrap();
4170        assert_eq!(fresh.unindexed_source_files, 0);
4171        assert_eq!(fresh.removed_indexed_files, 0);
4172        assert!(fresh.warning.is_none());
4173        assert_eq!(db.symbols("new_symbol", Some(Language::Rust), 10).unwrap().len(), 1);
4174        assert!(db.symbols("old_symbol", Some(Language::Rust), 10).unwrap().is_empty());
4175
4176        let mut events = Vec::new();
4177        let db = IndexDatabase::index_discover_with_progress(&config, |progress| {
4178            events.push(progress);
4179        })
4180        .unwrap();
4181        assert!(matches!(events.last(), Some(IndexProgress::Finished { files: 0 })));
4182        assert!(
4183            !events.iter().any(|event| matches!(
4184                event,
4185                IndexProgress::PreparingFile { .. } | IndexProgress::IndexingFile { .. }
4186            )),
4187            "no-op discover should not prepare or index files: {events:?}"
4188        );
4189        assert_eq!(db.symbols("new_symbol", Some(Language::Rust), 10).unwrap().len(), 1);
4190
4191        fs::remove_dir_all(root).unwrap();
4192    }
4193
4194    #[cfg(unix)]
4195    #[test]
4196    fn indexing_skips_symlink_loops() {
4197        let root = unique_temp_root();
4198        let _ = fs::remove_dir_all(&root);
4199        fs::create_dir_all(root.join("src")).unwrap();
4200        fs::write(root.join("src/lib.rs"), "pub fn loop_safe_symbol() {}\n").unwrap();
4201        std::os::unix::fs::symlink(&root, root.join("src/loop")).unwrap();
4202
4203        let config = source_config(root.clone(), Language::Rust);
4204        let db = IndexDatabase::rebuild(&config).unwrap();
4205
4206        assert_eq!(db.symbols("loop_safe_symbol", Some(Language::Rust), 10).unwrap().len(), 1);
4207
4208        fs::remove_dir_all(root).unwrap();
4209    }
4210
4211    #[test]
4212    fn dirty_git_files_are_indexed_as_worktree_overlay() {
4213        let root = unique_temp_root();
4214        let _ = fs::remove_dir_all(&root);
4215        let docs = root.join("docs");
4216        fs::create_dir_all(&docs).unwrap();
4217        fs::write(docs.join("search.md"), "# Title\nbase token\n").unwrap();
4218        run_git(&root, &["init"]);
4219        run_git(&root, &["add", "."]);
4220        run_git(
4221            &root,
4222            &[
4223                "-c",
4224                "user.name=Rag Rat Test",
4225                "-c",
4226                "user.email=rag-rat@example.invalid",
4227                "commit",
4228                "-m",
4229                "initial",
4230            ],
4231        );
4232
4233        let config = markdown_config_for_root(root.clone());
4234        let db = IndexDatabase::rebuild(&config).unwrap();
4235        assert_eq!(db.search("base", 10, false).unwrap().len(), 1);
4236
4237        fs::write(docs.join("search.md"), "# Title\noverlay token\n").unwrap();
4238        let db = IndexDatabase::index_changed(&config).unwrap();
4239        let scopes = db
4240            .storage
4241            .connection()
4242            .prepare(
4243                "
4244                SELECT commit_sha != '', worktree_id != ''
4245                FROM main.files
4246                WHERE path = 'docs/search.md'
4247                ORDER BY commit_sha != '' DESC, worktree_id != '' DESC
4248                ",
4249            )
4250            .unwrap()
4251            .query_map([], |row| Ok((row.get::<_, bool>(0)?, row.get::<_, bool>(1)?)))
4252            .unwrap()
4253            .collect::<Result<Vec<_>, _>>()
4254            .unwrap();
4255
4256        assert_eq!(scopes, vec![(true, false), (false, true)]);
4257        assert!(db.search("base", 10, false).unwrap().is_empty());
4258        let overlay_hits = db.search("overlay", 10, false).unwrap();
4259        assert_eq!(overlay_hits.len(), 1);
4260        assert!(overlay_hits[0].summary.contains("overlay token"));
4261
4262        fs::remove_dir_all(root).unwrap();
4263    }
4264
4265    #[test]
4266    fn rebuild_populates_revision_metadata_and_fresh_fts_state() {
4267        let (root, config) = markdown_config("alpha token");
4268        let db = IndexDatabase::rebuild(&config).unwrap();
4269        let status = db.status(&config.database).unwrap();
4270
4271        assert!(!status.content_revision.is_empty());
4272        assert_eq!(status.fts_source_revision.as_deref(), Some(status.content_revision.as_str()));
4273        assert_eq!(
4274            db.meta("content_revision").unwrap().as_deref(),
4275            Some(status.content_revision.as_str())
4276        );
4277        assert!(!status.fts_dirty);
4278        assert!(status.fts_fresh);
4279        assert!(!status.git_history.available);
4280        assert_eq!(status.git_history.commit_count, 0);
4281        assert_eq!(status.local_ai.embedding.state, "MissingModel");
4282        assert_eq!(status.local_ai.fastembed.backend, "fastembed");
4283        assert_eq!(status.local_ai.fastembed.model, ai::FASTEMBED_DISPLAY_MODEL);
4284        assert_eq!(status.local_ai.fastembed.dim, ai::FASTEMBED_EMBEDDING_DIM);
4285        assert!(!status.local_ai.fastembed.cache.is_empty());
4286        assert_eq!(status.local_ai.fastembed.build_feature_enabled, cfg!(feature = "fastembed"));
4287        assert_eq!(status.local_ai.artifacts.total_chunks, 1);
4288        assert_eq!(
4289            status.local_ai.artifacts.eligible_chunks + status.local_ai.artifacts.skipped_chunks,
4290            status.local_ai.artifacts.total_chunks
4291        );
4292        assert_eq!(
4293            status.local_ai.fastembed.eligible_embeddings
4294                + status.local_ai.fastembed.skipped_embeddings,
4295            status.local_ai.artifacts.total_chunks
4296        );
4297        assert_eq!(indexed_revision_count(&db), 1);
4298        assert_eq!(chunk_source_revision_count(&db), 1);
4299
4300        fs::remove_dir_all(root).unwrap();
4301    }
4302
4303    #[cfg(not(feature = "fastembed"))]
4304    #[test]
4305    fn fastembed_missing_feature_reports_rebuild_command() {
4306        let (root, config) = markdown_config("alpha token\n");
4307        let db = IndexDatabase::rebuild(&config).unwrap();
4308
4309        let err = db.install_model(ai::FASTEMBED_MODEL_ID).unwrap_err();
4310        assert!(err.to_string().contains(ai::FASTEMBED_MISSING_FEATURE_MESSAGE));
4311
4312        let status = db.local_ai_status().unwrap();
4313        assert!(!status.fastembed.build_feature_enabled);
4314        assert_eq!(status.fastembed.status, "MissingRuntime");
4315        assert_eq!(
4316            status.fastembed.message.as_deref(),
4317            Some(ai::FASTEMBED_MISSING_FEATURE_MESSAGE)
4318        );
4319        assert_eq!(status.fastembed.next.as_deref(), Some("cargo install rag-rat"));
4320
4321        fs::remove_dir_all(root).unwrap();
4322    }
4323
4324    #[test]
4325    fn reconcile_requires_explicit_model_install_and_ignores_stale_artifacts() {
4326        let (root, config) = markdown_config(
4327            "alpha token\nsecond line with enough detail for the semantic embedding policy to keep this chunk\nthird line with runtime context\n",
4328        );
4329        let db = IndexDatabase::rebuild(&config).unwrap();
4330        let chunk_id = first_chunk_id(&db);
4331
4332        let models = db.list_models().unwrap();
4333        let embedding = models.iter().find(|model| model.model_id == ai::HASH_MODEL_ID).unwrap();
4334        assert!(!embedding.installed);
4335        assert_eq!(embedding.status, "MissingModel");
4336
4337        let hits = db.search("alpha", 10, false).unwrap();
4338        assert_eq!(hits.len(), 1);
4339        assert!(hits[0].summary.contains("alpha token"));
4340
4341        let blocked = db.reconcile(Some(1), Some(8)).unwrap();
4342        assert_eq!(blocked.processed_chunks, 0);
4343        assert_eq!(blocked.embeddings_written, 0);
4344        assert_eq!(blocked.blocked_chunks, 0);
4345        assert_eq!(blocked.model_id, ai::HASH_MODEL_ID);
4346        assert_eq!(blocked.batch_size, 8);
4347        assert_eq!(blocked.status, "Blocked");
4348
4349        let status = db.local_ai_status().unwrap();
4350        assert_eq!(status.embedding.state, "MissingModel");
4351        assert_eq!(status.embedding.blocked_artifacts, 0);
4352
4353        db.install_model(ai::HASH_MODEL_ID).unwrap();
4354        let plan = db.reconcile_plan().unwrap();
4355        assert_eq!(plan.embeddings.missing, 1);
4356        assert_eq!(plan.embeddings.current, 0);
4357        let current = db.reconcile(Some(1), Some(8)).unwrap();
4358        assert_eq!(current.embeddings_written, 1);
4359        assert_eq!(current.model_id, ai::HASH_MODEL_ID);
4360        assert_eq!(current.model_version, "hash-v1");
4361        assert_eq!(current.embedding_dim, ai::HASH_EMBEDDING_DIM);
4362        assert_eq!(current.status, "Current");
4363        assert_eq!(current.work_reasons.get("Missing"), Some(&1));
4364        let noop = db.reconcile(None, Some(8)).unwrap();
4365        assert_eq!(noop.processed_chunks, 0);
4366        assert_eq!(noop.embeddings_written, 0);
4367        let status = db.local_ai_status().unwrap();
4368        assert_eq!(status.embedding.state, "Ready");
4369        assert_eq!(status.embedding.current_artifacts, 1);
4370        let embedding_bytes: i64 = db
4371            .storage
4372            .connection()
4373            .query_row(
4374                "SELECT length(vector_blob) FROM chunk_embeddings WHERE chunk_id = ?1 AND status = 'Current'",
4375                [chunk_id],
4376                |row| row.get(0),
4377            )
4378            .unwrap();
4379        assert_eq!(embedding_bytes, (ai::HASH_EMBEDDING_DIM * 4) as i64);
4380
4381        let hits = db.search("alpha", 10, false).unwrap();
4382        assert!(hits[0].summary.contains("alpha token"));
4383
4384        db.storage.connection().execute("DELETE FROM chunk_fts", []).unwrap();
4385        let vector_hits = db.search("alpha", 10, false).unwrap();
4386        assert_eq!(vector_hits.len(), 1);
4387        assert_eq!(vector_hits[0].chunk_id, chunk_id);
4388
4389        db.storage
4390            .connection()
4391            .execute(
4392                "UPDATE chunk_embeddings SET source_text_hash = 'old-hash' WHERE chunk_id = ?1",
4393                [chunk_id],
4394            )
4395            .unwrap();
4396        let plan = db.reconcile_plan().unwrap();
4397        assert_eq!(plan.embeddings.current, 0);
4398        assert_eq!(plan.embeddings.stale, 1);
4399        let refreshed = db.reconcile(None, Some(8)).unwrap();
4400        assert_eq!(refreshed.processed_chunks, 1);
4401        assert_eq!(refreshed.work_reasons.get("SourceChanged"), Some(&1));
4402        assert_eq!(db.current_embedding_count(ai::HASH_MODEL_ID).unwrap(), 1);
4403        let stale_embedding_hits = db.search("alpha", 10, false).unwrap();
4404        assert_eq!(stale_embedding_hits.len(), 1);
4405
4406        fs::remove_dir_all(root).unwrap();
4407    }
4408
4409    #[cfg(feature = "fastembed")]
4410    #[test]
4411    fn cached_fastembed_model_recovers_ready_state() {
4412        let (root, config) = markdown_config("alpha token\n");
4413        let db = IndexDatabase::rebuild(&config).unwrap();
4414        let cache_dir = root.join("models");
4415        let revision = "5f1b8cd78bc4fb444dd171e59b18f3a3af89a079";
4416        let repo = cache_dir.join("models--Qdrant--all-MiniLM-L6-v2-onnx");
4417        fs::create_dir_all(repo.join("refs")).unwrap();
4418        fs::create_dir_all(repo.join("snapshots").join(revision)).unwrap();
4419        fs::write(repo.join("refs").join("main"), revision).unwrap();
4420
4421        ai::recover_cached_fastembed_model_at(db.storage.connection(), &cache_dir).unwrap();
4422
4423        let models = db.list_models().unwrap();
4424        let fastembed =
4425            models.iter().find(|model| model.model_id == ai::FASTEMBED_MODEL_ID).unwrap();
4426        assert!(fastembed.installed);
4427        assert_eq!(fastembed.status, "Ready");
4428        let status = db.local_ai_status().unwrap();
4429        assert_eq!(status.fastembed.status, "Ready");
4430        assert!(status.fastembed.active);
4431
4432        fs::remove_dir_all(root).unwrap();
4433    }
4434
4435    #[cfg(feature = "fastembed")]
4436    #[test]
4437    fn compatible_migrate_recovers_cached_fastembed_model() {
4438        let (root, config) = markdown_config("alpha token\n");
4439        let db = IndexDatabase::rebuild(&config).unwrap();
4440        let cache_dir = root.join("models");
4441        let revision = "5f1b8cd78bc4fb444dd171e59b18f3a3af89a079";
4442        let repo = cache_dir.join("models--Qdrant--all-MiniLM-L6-v2-onnx");
4443        fs::create_dir_all(repo.join("refs")).unwrap();
4444        fs::create_dir_all(repo.join("snapshots").join(revision)).unwrap();
4445        fs::write(repo.join("refs").join("main"), revision).unwrap();
4446        db.storage
4447            .connection()
4448            .execute(
4449                "UPDATE ai_models
4450                 SET installed = 0, status = 'MissingModel', installed_at_ms = NULL
4451                 WHERE model_id = ?1",
4452                [ai::FASTEMBED_MODEL_ID],
4453            )
4454            .unwrap();
4455
4456        IndexDatabase::migrate_with_fastembed_cache(&config.database, Some(&cache_dir)).unwrap();
4457
4458        let db = IndexDatabase::open(&config.database).unwrap();
4459        let status = db.local_ai_status().unwrap();
4460        assert_eq!(status.fastembed.status, "Ready");
4461        assert!(status.fastembed.active);
4462
4463        fs::remove_dir_all(root).unwrap();
4464    }
4465
4466    #[test]
4467    fn reconcile_without_limit_processes_all_chunks() {
4468        let (root, config) = markdown_config(
4469            "# One\nalpha token with enough surrounding detail for embedding eligibility and useful semantic context\n\n# Two\nbeta token with enough surrounding detail for embedding eligibility and useful semantic context\n",
4470        );
4471        let db = IndexDatabase::rebuild(&config).unwrap();
4472        db.install_model(ai::HASH_MODEL_ID).unwrap();
4473
4474        let report = db.reconcile(None, Some(2)).unwrap();
4475
4476        assert_eq!(report.processed_chunks, 2);
4477        assert_eq!(report.embeddings_written, 2);
4478        assert_eq!(report.batch_size, 2);
4479        assert_eq!(db.current_embedding_count(ai::HASH_MODEL_ID).unwrap(), 2);
4480        let second = db.reconcile(None, Some(2)).unwrap();
4481        assert_eq!(second.processed_chunks, 0);
4482
4483        fs::remove_dir_all(root).unwrap();
4484    }
4485
4486    #[test]
4487    fn force_reconcile_processes_each_chunk_once_and_terminates() {
4488        // Regression: --force skipped the needs_embedding filter, so select_reconcile_batch
4489        // never returned an empty batch and the loop re-embedded the active set forever when
4490        // no --limit/--max-seconds was set. A generous finite limit lets this test terminate
4491        // either way; the processed/written counts distinguish fixed (==2) from buggy (==50).
4492        let (root, config) = markdown_config(
4493            "# One\nalpha token with enough surrounding detail for embedding eligibility and useful semantic context\n\n# Two\nbeta token with enough surrounding detail for embedding eligibility and useful semantic context\n",
4494        );
4495        let db = IndexDatabase::rebuild(&config).unwrap();
4496        db.install_model(ai::HASH_MODEL_ID).unwrap();
4497
4498        // Two eligible chunks; force with a limit far above the chunk count.
4499        let report = db.reconcile_with_progress(Some(50), Some(2), true, |_| {}).unwrap();
4500
4501        assert_eq!(report.embeddings_written, 2, "force re-embedded chunks: {report:?}");
4502        assert_eq!(report.processed_chunks, 2, "force re-processed chunks: {report:?}");
4503
4504        fs::remove_dir_all(root).unwrap();
4505    }
4506
4507    #[test]
4508    fn force_reconcile_progress_is_honest_and_terminates_without_limit() {
4509        let (root, config) = markdown_config(
4510            "# One\nalpha token with enough surrounding detail for embedding eligibility and useful semantic context\n\n# Two\nbeta token with enough surrounding detail for embedding eligibility and useful semantic context\n",
4511        );
4512        let db = IndexDatabase::rebuild(&config).unwrap();
4513        db.install_model(ai::HASH_MODEL_ID).unwrap();
4514
4515        // No --limit. max_seconds is only a safety net: if the force loop regressed to
4516        // re-embedding forever it would trip max_seconds and report "Partial" rather than
4517        // terminating naturally, which this test asserts against (no CI hang on regression).
4518        let mut events = Vec::new();
4519        let report = db
4520            .reconcile_with_options_progress(
4521                ai::ReconcileOptions {
4522                    force: true,
4523                    batch_size: Some(1),
4524                    max_seconds: Some(30),
4525                    ..ai::ReconcileOptions::default()
4526                },
4527                |event| events.push(event),
4528            )
4529            .unwrap();
4530
4531        assert_eq!(report.status, "Current", "did not terminate naturally: {report:?}");
4532        assert_eq!(report.processed_chunks, 2);
4533
4534        let started_total = events.iter().find_map(|event| match event {
4535            ai::ReconcileProgress::Started { total_chunks, .. } => Some(*total_chunks),
4536            _ => None,
4537        });
4538        assert_eq!(started_total, Some(2), "denominator should equal the eligible set");
4539
4540        for event in &events {
4541            if let ai::ReconcileProgress::Batch { processed_chunks, total_chunks, .. } = event {
4542                assert!(
4543                    processed_chunks <= total_chunks,
4544                    "progress exceeded 100%: {processed_chunks}/{total_chunks}",
4545                );
4546            }
4547        }
4548
4549        fs::remove_dir_all(root).unwrap();
4550    }
4551
4552    #[test]
4553    fn status_counts_only_active_context_chunks() {
4554        let (root, config) = markdown_config(
4555            "# One\nalpha token with enough surrounding detail for embedding eligibility and useful semantic context\n\n# Two\nbeta token with enough surrounding detail for embedding eligibility and useful semantic context\n",
4556        );
4557        let mut db = IndexDatabase::rebuild(&config).unwrap();
4558        db.install_model(ai::HASH_MODEL_ID).unwrap();
4559
4560        let active = db.local_ai_status().unwrap().artifacts.total_chunks;
4561        assert!(active > 0, "expected active chunks, got {active}");
4562
4563        // Point the connection at a context that matches no indexed rows. The active set
4564        // (temp.files) is now empty, so status must report 0 chunks. Pre-fix the counts ran
4565        // over main.chunks (every indexed commit) and ignored the active context entirely.
4566        db.set_context("deadbeefdeadbeefdeadbeefdeadbeefdeadbeef", "ghost-worktree").unwrap();
4567        let scoped = db.local_ai_status().unwrap().artifacts;
4568        assert_eq!(scoped.total_chunks, 0, "status ignored active context scope");
4569        assert_eq!(scoped.current, 0);
4570
4571        fs::remove_dir_all(root).unwrap();
4572    }
4573
4574    #[test]
4575    fn watch_maintenance_pass_indexes_new_files() {
4576        // A watcher pass must pick up a brand-new (uncommitted) file, not just refresh known ones.
4577        let root = unique_temp_root();
4578        let _ = fs::remove_dir_all(&root);
4579        fs::create_dir_all(root.join("src")).unwrap();
4580        fs::write(root.join("src/one.rs"), "pub fn one() {}\n").unwrap();
4581        let config = source_config(root.clone(), Language::Rust);
4582        IndexDatabase::rebuild(&config).unwrap();
4583
4584        // New file appears after the initial index; a maintenance pass should index it.
4585        fs::write(root.join("src/two.rs"), "pub fn newly_added_symbol() {}\n").unwrap();
4586        crate::watch::maintenance_pass(&config, false).unwrap();
4587
4588        let db = IndexDatabase::open_config(&config).unwrap();
4589        let hits = db.symbols("newly_added_symbol", Some(Language::Rust), 10).unwrap();
4590        assert!(!hits.is_empty(), "watcher pass did not index the new file");
4591
4592        fs::remove_dir_all(root).unwrap();
4593    }
4594
4595    #[test]
4596    fn discover_deletion_is_worktree_scoped() {
4597        // Invariant (watcher spec, review item 1): a discover pass run from worktree A must remove
4598        // only A's own rows for files missing from A's disk — never another worktree's overlay
4599        // rows. Otherwise two watchers on one shared DB delete each other's live overlays.
4600        let root = unique_temp_root();
4601        let _ = fs::remove_dir_all(&root);
4602        fs::create_dir_all(root.join("src")).unwrap();
4603        fs::write(root.join("src/a.rs"), "pub fn a() {}\n").unwrap();
4604        fs::write(root.join("src/b.rs"), "pub fn b() {}\n").unwrap();
4605        let config = source_config(root.clone(), Language::Rust);
4606        let db = IndexDatabase::rebuild(&config).unwrap();
4607
4608        // A row owned by a *different* worktree, for a path that does not exist on this disk.
4609        db.storage
4610            .connection()
4611            .execute(
4612                "INSERT INTO main.files(path, language, kind, sha256, modified_at_ms, generated,
4613                     indexed_at_ms, indexed_revision, commit_sha, worktree_id)
4614                 VALUES ('src/only_in_other.rs','rust','source','h',0,0,0,'rev','',
4615                     'other-worktree')",
4616                [],
4617            )
4618            .unwrap();
4619        drop(db);
4620
4621        // This worktree loses a.rs; re-discover as this worktree.
4622        fs::remove_file(root.join("src/a.rs")).unwrap();
4623        let db = IndexDatabase::index_discover(&config).unwrap();
4624        let conn = db.storage.connection();
4625
4626        // The other worktree's overlay row survives untouched.
4627        let other: i64 = conn
4628            .query_row(
4629                "SELECT COUNT(*) FROM main.files WHERE worktree_id = 'other-worktree' \
4630                 AND kind != 'deleted'",
4631                [],
4632                |row| row.get(0),
4633            )
4634            .unwrap();
4635        assert_eq!(other, 1, "this worktree's pass deleted another worktree's row");
4636
4637        // Deletion still works within this worktree's own scope: a.rs gone from the active view,
4638        // b.rs retained.
4639        let active = |path: &str| -> i64 {
4640            conn.query_row("SELECT COUNT(*) FROM files WHERE path = ?1", [path], |row| row.get(0))
4641                .unwrap()
4642        };
4643        assert_eq!(active("src/a.rs"), 0, "deleted file still active in own worktree");
4644        assert_eq!(active("src/b.rs"), 1, "live file dropped from own worktree");
4645
4646        fs::remove_dir_all(root).unwrap();
4647    }
4648
4649    #[test]
4650    fn gc_prunes_dead_context_rows_and_keeps_live_ones() {
4651        let (root, config) = markdown_config(
4652            "# One\nalpha token with enough surrounding detail for embedding eligibility and useful semantic context\n\n# Two\nbeta token with enough surrounding detail for embedding eligibility and useful semantic context\n",
4653        );
4654        let db = IndexDatabase::rebuild(&config).unwrap();
4655        db.install_model(ai::HASH_MODEL_ID).unwrap();
4656        db.reconcile(None, Some(8)).unwrap();
4657
4658        let live_files = table_row_count(db.storage.connection(), "files").unwrap();
4659        let live_chunks = table_row_count(db.storage.connection(), "chunks").unwrap();
4660        assert!(live_files > 0 && live_chunks > 0);
4661
4662        // A ghost file from a commit/worktree that is not live.
4663        db.storage
4664            .connection()
4665            .execute(
4666                "INSERT INTO main.files(path, language, kind, sha256, modified_at_ms, generated,
4667                     indexed_at_ms, indexed_revision, commit_sha, worktree_id)
4668                 VALUES ('ghost.md','markdown','source','deadhash',0,0,0,'deadrev',
4669                     'deadcommit','dead-worktree')",
4670                [],
4671            )
4672            .unwrap();
4673        assert_eq!(table_row_count(db.storage.connection(), "files").unwrap(), live_files + 1);
4674
4675        // Keep only the active worktree. The ghost's commit and worktree are not live.
4676        let live_worktree = db.active_worktree_id.clone();
4677        let report = db.prune_to_live(&[], &[live_worktree]).unwrap();
4678
4679        assert!(!report.skipped);
4680        assert_eq!(report.files_pruned, 1, "ghost not pruned: {report:?}");
4681        assert_eq!(
4682            table_row_count(db.storage.connection(), "files").unwrap(),
4683            live_files,
4684            "live files were pruned",
4685        );
4686        assert_eq!(
4687            table_row_count(db.storage.connection(), "chunks").unwrap(),
4688            live_chunks,
4689            "live chunks were pruned",
4690        );
4691
4692        fs::remove_dir_all(root).unwrap();
4693    }
4694
4695    #[test]
4696    fn gc_refuses_to_prune_with_no_live_context() {
4697        let (root, config) =
4698            markdown_config("# Only\nsome content with enough detail for a chunk\n");
4699        let db = IndexDatabase::rebuild(&config).unwrap();
4700        let before = table_row_count(db.storage.connection(), "files").unwrap();
4701        assert!(before > 0);
4702
4703        // Empty live sets must never wipe the index.
4704        let report = db.prune_to_live(&[], &[]).unwrap();
4705        assert!(report.skipped);
4706        assert_eq!(report.files_pruned, 0);
4707        assert_eq!(table_row_count(db.storage.connection(), "files").unwrap(), before);
4708
4709        fs::remove_dir_all(root).unwrap();
4710    }
4711
4712    #[test]
4713    fn reconcile_treats_c_chunks_as_embedding_eligible() {
4714        let root = unique_temp_root();
4715        let _ = fs::remove_dir_all(&root);
4716        fs::create_dir_all(root.join("src")).unwrap();
4717        fs::write(
4718            root.join("src/main.c"),
4719            r#"
4720static int read_sensor_value(int baseline)
4721{
4722    int adjusted = baseline + 42;
4723    return adjusted;
4724}
4725
4726int main(void)
4727{
4728    int sample = read_sensor_value(7);
4729    return sample == 49 ? 0 : 1;
4730}
4731"#,
4732        )
4733        .unwrap();
4734        let config = source_config(root.clone(), Language::C);
4735        let db = IndexDatabase::rebuild(&config).unwrap();
4736        db.install_model(ai::HASH_MODEL_ID).unwrap();
4737
4738        let plan = db.reconcile_plan().unwrap();
4739
4740        assert_eq!(plan.embeddings.skipped_by_policy.get("SkipLanguageUnsupported"), None);
4741        assert!(plan.embeddings.missing > 0, "plan: {:?}", plan.embeddings);
4742
4743        let report = db.reconcile(None, Some(8)).unwrap();
4744        assert!(report.embeddings_written > 0, "report: {report:?}");
4745
4746        fs::remove_dir_all(root).unwrap();
4747    }
4748
4749    #[test]
4750    fn reconcile_policy_skips_tiny_chunks_before_embedding() {
4751        let (root, config) = markdown_config("tiny\n");
4752        let db = IndexDatabase::rebuild(&config).unwrap();
4753        db.install_model(ai::HASH_MODEL_ID).unwrap();
4754
4755        let plan = db.reconcile_plan().unwrap();
4756        assert_eq!(plan.embeddings.missing, 0);
4757        assert_eq!(plan.embeddings.skipped_by_policy.get("SkipTooSmall"), Some(&1));
4758
4759        let report = db.reconcile(None, Some(8)).unwrap();
4760        assert_eq!(report.embeddings_written, 0);
4761        assert_eq!(report.skipped_by_policy.get("SkipTooSmall"), Some(&1));
4762        assert_eq!(db.current_embedding_count(ai::HASH_MODEL_ID).unwrap(), 0);
4763
4764        fs::remove_dir_all(root).unwrap();
4765    }
4766
4767    #[test]
4768    fn reconcile_plan_reports_policy_skips_for_fastembed_model() {
4769        let (root, config) = markdown_config("tiny\n");
4770        let db = IndexDatabase::rebuild(&config).unwrap();
4771        db.storage
4772            .connection()
4773            .execute(
4774                "UPDATE ai_models
4775                 SET installed = 1, disabled = 0, status = 'Ready', embedding_dim = ?2
4776                 WHERE model_id = ?1",
4777                params![
4778                    ai::FASTEMBED_MODEL_ID,
4779                    i64::try_from(ai::FASTEMBED_EMBEDDING_DIM).unwrap()
4780                ],
4781            )
4782            .unwrap();
4783        db.storage
4784            .connection()
4785            .execute(
4786                "INSERT INTO index_meta(key, value) VALUES ('active_embedding_model', ?1)
4787                 ON CONFLICT(key) DO UPDATE SET value = excluded.value",
4788                [ai::FASTEMBED_MODEL_ID],
4789            )
4790            .unwrap();
4791
4792        let plan = db.reconcile_plan().unwrap();
4793
4794        assert_eq!(plan.embeddings.model_id, ai::FASTEMBED_MODEL_ID);
4795        assert_eq!(plan.embeddings.missing, 0);
4796        assert_eq!(plan.embeddings.skipped_by_policy.get("SkipTooSmall"), Some(&1));
4797
4798        fs::remove_dir_all(root).unwrap();
4799    }
4800
4801    #[cfg(not(feature = "fastembed"))]
4802    #[test]
4803    fn blocked_fastembed_reconcile_still_reports_policy_skips() {
4804        let (root, config) = markdown_config("tiny\n");
4805        let db = IndexDatabase::rebuild(&config).unwrap();
4806        db.storage
4807            .connection()
4808            .execute(
4809                "INSERT INTO index_meta(key, value) VALUES ('active_embedding_model', ?1)
4810                 ON CONFLICT(key) DO UPDATE SET value = excluded.value",
4811                [ai::FASTEMBED_MODEL_ID],
4812            )
4813            .unwrap();
4814
4815        let report = db.reconcile(None, Some(8)).unwrap();
4816
4817        assert_eq!(report.status, "Blocked");
4818        assert_eq!(report.skipped_by_policy.get("SkipTooSmall"), Some(&1));
4819
4820        fs::remove_dir_all(root).unwrap();
4821    }
4822
4823    #[test]
4824    fn search_explain_reports_weighted_score_components() {
4825        let (root, config) = markdown_config(
4826            "alpha runtime shutdown\nsecond line with enough detail for embedding eligibility and semantic vector scoring\nthird line\n",
4827        );
4828        let db = IndexDatabase::rebuild(&config).unwrap();
4829        db.install_model(ai::HASH_MODEL_ID).unwrap();
4830        db.reconcile(None, Some(8)).unwrap();
4831
4832        let hits = db.search_explain("runtime shutdown", 10, false).unwrap();
4833
4834        assert_eq!(hits.len(), 1);
4835        let components = hits[0].score_components.as_ref().unwrap();
4836        let component_sum = components.bm25
4837            + components.vector
4838            + components.symbol
4839            + components.graph
4840            + components.git
4841            + components.github;
4842        // `score` is rounded to 4dp for display, so compare against the rounded component sum.
4843        assert!((hits[0].score - crate::query::round_score(component_sum)).abs() < 1e-9);
4844        assert!(components.bm25 > 0.0);
4845        assert!(components.vector > 0.0);
4846        assert!(components.vector_note.is_none());
4847        assert!(components.bm25 <= 0.45);
4848        assert!(components.vector <= 0.35);
4849        assert!(components.symbol <= 0.10);
4850        assert!(components.graph <= 0.05);
4851        assert!(components.git <= 0.03);
4852        assert!(components.github <= 0.02);
4853        assert!(db.search("runtime shutdown", 10, false).unwrap()[0].score_components.is_none());
4854
4855        fs::remove_dir_all(root).unwrap();
4856    }
4857
4858    #[test]
4859    fn search_explain_labels_missing_vector_runtime() {
4860        let (root, config) = markdown_config(
4861            "alpha runtime shutdown\nsecond line with enough detail for lexical search without embeddings\nthird line\n",
4862        );
4863        let db = IndexDatabase::rebuild(&config).unwrap();
4864
4865        let hits = db.search_explain("runtime shutdown", 10, false).unwrap();
4866
4867        assert_eq!(hits.len(), 1);
4868        let components = hits[0].score_components.as_ref().unwrap();
4869        assert!(components.bm25 > 0.0);
4870        assert_eq!(components.vector, 0.0);
4871        assert_eq!(
4872            components.vector_note.as_deref(),
4873            Some("vector search unavailable: no current embedding model")
4874        );
4875
4876        fs::remove_dir_all(root).unwrap();
4877    }
4878
4879    #[test]
4880    fn git_history_indexes_commits_paths_queries_and_blame() {
4881        let root = unique_temp_root();
4882        let _ = fs::remove_dir_all(&root);
4883        fs::create_dir_all(root.join("docs")).unwrap();
4884        fs::create_dir_all(root.join("src")).unwrap();
4885        run_git(&root, &["init"]);
4886        run_git(&root, &["config", "user.name", "Rag Rat"]);
4887        run_git(&root, &["config", "user.email", "rag@example.com"]);
4888
4889        fs::write(root.join("docs/search.md"), "# Title\nalpha token\n").unwrap();
4890        fs::write(root.join("src/lib.rs"), "pub fn tracked_symbol() {}\n").unwrap();
4891        run_git(&root, &["add", "."]);
4892        run_git(&root, &["commit", "-m", "Add alpha docs"]);
4893
4894        fs::write(root.join("docs/search.md"), "# Title\nbeta token\n").unwrap();
4895        run_git(&root, &["add", "."]);
4896        run_git(&root, &["commit", "-m", "Refresh beta docs"]);
4897
4898        let config = Config {
4899            root: root.clone(),
4900            database: root.join(".rag-rat/index.sqlite"),
4901            targets: vec![
4902                ResolvedTarget {
4903                    name: "markdown".to_string(),
4904                    language: Language::Markdown,
4905                    directories: vec![PathBuf::from("docs")],
4906                    include: vec!["**/*.md".to_string()],
4907                    exclude: Vec::new(),
4908                    kind: TargetKind::Docs,
4909                },
4910                ResolvedTarget {
4911                    name: "rust".to_string(),
4912                    language: Language::Rust,
4913                    directories: vec![PathBuf::from("src")],
4914                    include: vec!["**/*.rs".to_string()],
4915                    exclude: Vec::new(),
4916                    kind: TargetKind::Source,
4917                },
4918            ],
4919            local_ai: Default::default(),
4920            watch: Default::default(),
4921        };
4922        let db = IndexDatabase::rebuild(&config).unwrap();
4923        let status = db.status(&config.database).unwrap();
4924        assert!(status.git_history.available);
4925        assert!(status.git_history.head.is_some());
4926        assert_eq!(status.git_history.indexed_head, status.git_history.head);
4927        assert_eq!(status.git_history.commit_count, 2);
4928        assert_eq!(status.git_history.file_change_count, 3);
4929
4930        let commit_hits = db.commit_search("beta", 10).unwrap();
4931        assert_eq!(commit_hits.len(), 1);
4932        assert_eq!(commit_hits[0].subject, "Refresh beta docs");
4933        assert_eq!(commit_hits[0].evidence_kind, "historical");
4934        assert!(commit_hits[0].score > 0.0);
4935
4936        let path_history = db.git_history_for_path("docs/search.md", 10).unwrap();
4937        assert_eq!(path_history.len(), 2);
4938        assert!(path_history.iter().all(|item| item.evidence_kind == "historical"));
4939
4940        let symbol_history =
4941            db.git_history_for_symbol("tracked_symbol", Some(Language::Rust), 10).unwrap();
4942        assert_eq!(symbol_history.len(), 1);
4943        assert_eq!(symbol_history[0].path, "src/lib.rs");
4944        assert_eq!(symbol_history[0].evidence_kind, "historical");
4945        let impact = db.impact_surface("tracked_symbol", 10).unwrap();
4946        assert!(impact.iter().any(|item| {
4947            item.category == "Direct structural impact" && item.reason == "exact_symbol_definition"
4948        }));
4949        assert!(impact.iter().any(|item| {
4950            item.category == "Historical/papertrail evidence"
4951                && item.reason == "git_commit_touched_file"
4952        }));
4953
4954        let query_commits = db.commits_touching_query("beta", 10).unwrap();
4955        let beta_commit =
4956            query_commits.iter().find(|hit| hit.subject == "Refresh beta docs").unwrap();
4957        assert!(beta_commit.evidence.iter().any(|value| value == "commit_message"));
4958        assert!(beta_commit.evidence.iter().any(|value| value == "file_change"));
4959        assert_eq!(beta_commit.evidence_kind, "historical");
4960
4961        let chunk_id = first_chunk_id(&db);
4962        let blame = db.git_blame_chunk(chunk_id).unwrap().unwrap();
4963        assert_eq!(blame.source_text_hash, hex_sha256("# Title\nbeta token\n".as_bytes()));
4964        assert_eq!(blame.line_count, 2);
4965        assert_eq!(blame.commit_counts.values().sum::<i64>(), 2);
4966        assert!(blame.dominant_commit_lines >= 1);
4967        assert!(blame.dominant_commit.is_some());
4968        assert_eq!(blame.evidence_kind, "historical");
4969        let cached = db.git_blame_chunk(chunk_id).unwrap().unwrap();
4970        assert_eq!(cached.source_text_hash, blame.source_text_hash);
4971
4972        fs::remove_dir_all(root).unwrap();
4973    }
4974
4975    #[test]
4976    fn indexes_rust_graph_edges_from_tree_sitter() {
4977        let root = unique_temp_root();
4978        let _ = fs::remove_dir_all(&root);
4979        fs::create_dir_all(root.join("src")).unwrap();
4980        fs::write(
4981            root.join("src/lib.rs"),
4982            r#"
4983use crate::worker::Worker;
4984mod worker;
4985
4986trait Service {
4987    fn serve(&self);
4988}
4989
4990struct Worker;
4991
4992impl Service for Worker {
4993    fn serve(&self) {
4994        helper();
4995    }
4996}
4997
4998fn helper() {}
4999
5000fn caller() {
5001    helper();
5002    Worker.serve();
5003}
5004"#,
5005        )
5006        .unwrap();
5007        let config = source_config(root.clone(), Language::Rust);
5008        let db = IndexDatabase::rebuild(&config).unwrap();
5009
5010        assert_edge(&db, "caller", "helper", "calls_name", "Syntactic");
5011        assert_edge(&db, "Worker", "Service", "implements", "Syntactic");
5012        assert_edge(&db, "src/lib.rs", "worker", "imports", "Syntactic");
5013        let callers = db.find_callers("helper", 10).unwrap();
5014        assert!(
5015            callers.iter().any(|edge| {
5016                edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("caller"))
5017                    && edge.edge_kind == "calls_name"
5018            }),
5019            "helper callers: {callers:?}"
5020        );
5021
5022        fs::remove_dir_all(root).unwrap();
5023    }
5024
5025    #[test]
5026    fn ffi_surface_labels_exported_impl_members_separately() {
5027        let root = unique_temp_root();
5028        let _ = fs::remove_dir_all(&root);
5029        fs::create_dir_all(root.join("src")).unwrap();
5030        fs::write(
5031            root.join("src/lib.rs"),
5032            r#"
5033pub struct PhraseRepo;
5034
5035#[uniffi::export]
5036impl PhraseRepo {
5037    pub fn children(&self) {}
5038    pub fn journal(&self) {}
5039}
5040
5041#[cfg_attr(not(target_arch = "wasm32"), uniffi::export(async_runtime = "tokio"))]
5042impl Runtime {
5043    pub fn route_search_query(&self) {}
5044}
5045
5046pub struct Runtime;
5047
5048/// Not #[uniffi::export]: this is an internal helper.
5049pub fn internal_helper() {}
5050
5051#[cfg_attr(target_arch = "wasm32", ::uniffi::export)]
5052pub fn exported_fn() {}
5053"#,
5054        )
5055        .unwrap();
5056        let config = source_config(root.clone(), Language::Rust);
5057        let db = IndexDatabase::rebuild(&config).unwrap();
5058
5059        let surface = db.ffi_surface(20).unwrap();
5060        assert!(
5061            surface.iter().any(|item| {
5062                item.reason == "rust_uniffi_export"
5063                    && item.symbol.as_deref().is_some_and(|symbol| symbol.ends_with("exported_fn"))
5064            }),
5065            "direct export should remain direct: {surface:?}"
5066        );
5067        assert!(
5068            surface.iter().any(|item| item.reason == "rust_uniffi_exported_impl"),
5069            "exported impl/type surface should be explicit: {surface:?}"
5070        );
5071        assert!(
5072            surface.iter().any(|item| {
5073                item.reason == "rust_uniffi_impl_member"
5074                    && item
5075                        .symbol
5076                        .as_deref()
5077                        .is_some_and(|symbol| symbol.ends_with("route_search_query"))
5078            }),
5079            "cfg_attr exported impl member should be labeled separately: {surface:?}"
5080        );
5081        assert!(
5082            surface.iter().any(|item| {
5083                item.reason == "rust_uniffi_impl_member"
5084                    && item.symbol.as_deref().is_some_and(|symbol| symbol.ends_with("children"))
5085            }),
5086            "impl member should be labeled separately: {surface:?}"
5087        );
5088        assert!(
5089            !surface.iter().any(|item| {
5090                item.reason == "rust_uniffi_export"
5091                    && item.symbol.as_deref().is_some_and(|symbol| {
5092                        symbol.ends_with("children") || symbol.ends_with("journal")
5093                    })
5094            }),
5095            "impl members must not be reported as direct exports: {surface:?}"
5096        );
5097        assert!(
5098            !surface.iter().any(|item| {
5099                item.symbol.as_deref().is_some_and(|symbol| symbol.ends_with("internal_helper"))
5100            }),
5101            "comment-only UniFFI mentions must not create FFI surface rows: {surface:?}"
5102        );
5103
5104        fs::remove_dir_all(root).unwrap();
5105    }
5106
5107    #[test]
5108    fn find_callers_sees_calls_in_let_bindings() {
5109        // Regression for issue #47: calls in `let x = f();` and `let-else` initializers produced
5110        // no caller edge, so find_callers reported 0 callers for a function that is called.
5111        let root = unique_temp_root();
5112        let _ = fs::remove_dir_all(&root);
5113        fs::create_dir_all(root.join("src")).unwrap();
5114        fs::write(
5115            root.join("src/lib.rs"),
5116            "pub fn target() -> Option<i32> {\n    Some(1)\n}\n\n\
5117             pub fn via_statement() {\n    target();\n}\n\n\
5118             pub fn via_let() {\n    let _x = target();\n}\n\n\
5119             pub fn via_let_else() {\n    let Some(_x) = target() else {\n        return;\n    };\n}\n",
5120        )
5121        .unwrap();
5122        let config = source_config(root.clone(), Language::Rust);
5123        let db = IndexDatabase::rebuild(&config).unwrap();
5124
5125        let callers = db.find_callers("target", 50).unwrap();
5126        let names: Vec<String> = callers.iter().filter_map(|hop| hop.from_symbol.clone()).collect();
5127        let has = |suffix: &str| names.iter().any(|name| name.ends_with(suffix));
5128
5129        assert!(has("via_statement"), "missing plain-statement caller; got {names:?}");
5130        assert!(has("via_let"), "missing `let x = target()` caller; got {names:?}");
5131        assert!(has("via_let_else"), "missing `let-else` caller; got {names:?}");
5132
5133        fs::remove_dir_all(root).unwrap();
5134    }
5135
5136    #[test]
5137    fn search_and_read_chunk_attach_bounded_graph_evidence() {
5138        let root = unique_temp_root();
5139        let _ = fs::remove_dir_all(&root);
5140        fs::create_dir_all(root.join("src")).unwrap();
5141        fs::write(
5142            root.join("src/lib.rs"),
5143            "pub fn helper() {}\n\npub fn caller() {\n    helper();\n}\n",
5144        )
5145        .unwrap();
5146        let config = source_config(root.clone(), Language::Rust);
5147        let db = IndexDatabase::rebuild(&config).unwrap();
5148
5149        let hits = db.search("helper caller", 10, false).unwrap();
5150        let helper_hit = hits
5151            .iter()
5152            .find(|hit| hit.symbol_path.as_deref().is_some_and(|path| path.ends_with("helper")))
5153            .expect("helper search hit");
5154        let helper_graph = helper_hit.graph.as_ref().expect("helper graph evidence");
5155        assert_eq!(helper_graph.caller_count, 1);
5156        assert!(helper_graph.top_callers.iter().any(|caller| {
5157            caller.symbol_path.ends_with("caller")
5158                && caller.callsite.line == 4
5159                && caller.callsite.span == [4, 4]
5160                && caller.confidence == "syntactic"
5161        }));
5162        assert!(helper_graph.callers.is_empty(), "search keeps graph compact");
5163
5164        let caller_hit = hits
5165            .iter()
5166            .find(|hit| hit.symbol_path.as_deref().is_some_and(|path| path.ends_with("caller")))
5167            .expect("caller search hit");
5168        let caller_graph = caller_hit.graph.as_ref().expect("caller graph evidence");
5169        assert!(caller_graph.top_callees.iter().any(|callee| {
5170            callee.target == "helper"
5171                && callee.callsite.line == 4
5172                && callee.callsite.span == [4, 4]
5173                && callee.confidence == "syntactic"
5174        }));
5175
5176        let chunk = db.read_chunk(caller_hit.chunk_id).unwrap().expect("caller chunk");
5177        let full_graph = chunk.graph.as_ref().expect("full read_chunk graph");
5178        assert!(full_graph.symbol.as_ref().is_some_and(|symbol| symbol.name == "caller"));
5179        assert!(
5180            full_graph
5181                .callees
5182                .iter()
5183                .any(|callee| callee.target == "helper" && callee.callsite.line == 4)
5184        );
5185        assert!(full_graph.notes.iter().any(|note| note.contains("tree-sitter/syntactic")));
5186
5187        fs::remove_dir_all(root).unwrap();
5188    }
5189
5190    #[test]
5191    fn graph_exact_mode_requires_verified_symbol_identity() {
5192        let root = unique_temp_root();
5193        let _ = fs::remove_dir_all(&root);
5194        fs::create_dir_all(root.join("src")).unwrap();
5195        fs::write(
5196            root.join("src/lib.rs"),
5197            "pub fn helper() {}\n\npub fn caller() {\n    helper();\n}\n",
5198        )
5199        .unwrap();
5200        let config = source_config(root.clone(), Language::Rust);
5201        let db = IndexDatabase::rebuild(&config).unwrap();
5202        let helper = db.symbols("helper", Some(Language::Rust), 10).unwrap().remove(0);
5203        let caller = db.symbols("caller", Some(Language::Rust), 10).unwrap().remove(0);
5204
5205        let bare_exact = db
5206            .find_callers_with_options(
5207                "helper",
5208                10,
5209                &crate::query::graph::GraphTraversalOptions {
5210                    resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
5211                    ..Default::default()
5212                },
5213            )
5214            .unwrap();
5215        assert!(bare_exact.is_empty(), "bare exact lookup should not fall back: {bare_exact:?}");
5216
5217        let exact_callers = db
5218            .find_callers_with_options(
5219                "helper",
5220                10,
5221                &crate::query::graph::GraphTraversalOptions {
5222                    resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
5223                    symbol_id: Some(helper.symbol_id),
5224                    ..Default::default()
5225                },
5226            )
5227            .unwrap();
5228        assert!(
5229            exact_callers.iter().any(|edge| {
5230                edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("caller"))
5231                    && edge.verified_target_symbol
5232            }),
5233            "exact callers: {exact_callers:?}"
5234        );
5235        assert!(exact_callers.iter().all(|edge| edge.verified_target_symbol));
5236
5237        let exact_callees = db
5238            .trace_callees_with_options(
5239                "caller",
5240                10,
5241                &crate::query::graph::GraphTraversalOptions {
5242                    resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
5243                    symbol_id: Some(caller.symbol_id),
5244                    ..Default::default()
5245                },
5246            )
5247            .unwrap();
5248        assert!(
5249            exact_callees.iter().any(|edge| {
5250                edge.target.as_deref() == Some("helper") && edge.verified_target_symbol
5251            }),
5252            "exact callees: {exact_callees:?}"
5253        );
5254        assert!(exact_callees.iter().all(|edge| edge.verified_target_symbol));
5255
5256        fs::remove_dir_all(root).unwrap();
5257    }
5258
5259    #[test]
5260    fn symbol_lookup_ranks_type_definitions_before_impl_blocks() {
5261        let root = unique_temp_root();
5262        let _ = fs::remove_dir_all(&root);
5263        fs::create_dir_all(root.join("src")).unwrap();
5264        fs::write(
5265            root.join("src/lib.rs"),
5266            r#"
5267impl Database {
5268    pub fn open() -> Self {
5269        Database
5270    }
5271}
5272
5273pub struct Database;
5274"#,
5275        )
5276        .unwrap();
5277        let config = source_config(root.clone(), Language::Rust);
5278        let db = IndexDatabase::rebuild(&config).unwrap();
5279        let hits = db.symbols("Database", Some(Language::Rust), 10).unwrap();
5280        assert!(hits.len() >= 2, "fixture should expose both impl and struct symbols: {hits:?}");
5281        assert_eq!(hits[0].kind, "struct", "Database lookup should prefer type definition");
5282        assert!(
5283            hits.iter().any(|hit| hit.kind == "impl"),
5284            "impl Database should still be available after the struct: {hits:?}"
5285        );
5286
5287        fs::remove_dir_all(root).unwrap();
5288    }
5289
5290    #[test]
5291    fn distinct_same_named_methods_do_not_merge_and_logical_ids_are_stable() {
5292        // Two `new` on different impls share a `qualified_name` (`…lib.rs::new`) but differ in
5293        // signature — they must NOT collapse into one "cfg_variant" logical symbol. And the
5294        // logical id must be stable across a reindex (it is content-derived, not an autoincrement).
5295        let root = unique_temp_root();
5296        let _ = fs::remove_dir_all(&root);
5297        fs::create_dir_all(root.join("src")).unwrap();
5298        fs::write(
5299            root.join("src/lib.rs"),
5300            r#"
5301pub struct A;
5302pub struct B;
5303
5304impl A {
5305    pub fn new(name: String) -> Self { A }
5306}
5307
5308impl B {
5309    pub fn new(count: usize, flag: bool) -> Self { B }
5310}
5311"#,
5312        )
5313        .unwrap();
5314        let config = source_config(root.clone(), Language::Rust);
5315        let db = IndexDatabase::rebuild(&config).unwrap();
5316
5317        let selector = crate::query::symbol::SymbolSelector {
5318            logical_symbol_id: None,
5319            symbol_id: None,
5320            symbol_path: None,
5321            symbol: Some("new".to_string()),
5322            language: Some(Language::Rust),
5323            allow_ambiguous: true,
5324            limit: 10,
5325        };
5326        let lookup = db.symbol_candidates(&selector).unwrap();
5327        let new_candidates: Vec<_> =
5328            lookup.candidates.iter().filter(|candidate| candidate.name == "new").collect();
5329        assert_eq!(new_candidates.len(), 2, "both constructors present: {new_candidates:?}");
5330        let logical_ids: std::collections::BTreeSet<i64> =
5331            new_candidates.iter().filter_map(|candidate| candidate.logical_symbol_id).collect();
5332        assert_eq!(logical_ids.len(), 2, "distinct signatures get distinct logical ids");
5333        for candidate in &new_candidates {
5334            assert_eq!(
5335                candidate.logical_group_reason.as_deref(),
5336                Some("single"),
5337                "differently-signed methods are not cfg variants: {candidate:?}"
5338            );
5339        }
5340
5341        // Reindex and confirm the logical ids are unchanged (content-derived, not churned).
5342        let db = IndexDatabase::rebuild(&config).unwrap();
5343        let relookup = db.symbol_candidates(&selector).unwrap();
5344        let reindexed_ids: std::collections::BTreeSet<i64> = relookup
5345            .candidates
5346            .iter()
5347            .filter(|candidate| candidate.name == "new")
5348            .filter_map(|candidate| candidate.logical_symbol_id)
5349            .collect();
5350        assert_eq!(reindexed_ids, logical_ids, "logical ids must be stable across reindex");
5351
5352        fs::remove_dir_all(root).unwrap();
5353    }
5354
5355    #[test]
5356    fn logical_symbol_exact_mode_covers_duplicate_rust_variants() {
5357        let root = unique_temp_root();
5358        let _ = fs::remove_dir_all(&root);
5359        fs::create_dir_all(root.join("src")).unwrap();
5360        fs::write(
5361            root.join("src/lib.rs"),
5362            r#"
5363#[cfg(not(target_arch = "wasm32"))]
5364pub fn spawn_blocking() {}
5365
5366#[cfg(target_arch = "wasm32")]
5367pub fn spawn_blocking() {}
5368
5369pub fn caller() {
5370    spawn_blocking();
5371}
5372"#,
5373        )
5374        .unwrap();
5375        let config = source_config(root.clone(), Language::Rust);
5376        let db = IndexDatabase::rebuild(&config).unwrap();
5377        let lookup = db
5378            .symbol_candidates(&crate::query::symbol::SymbolSelector {
5379                logical_symbol_id: None,
5380                symbol_id: None,
5381                symbol_path: None,
5382                symbol: Some("spawn_blocking".to_string()),
5383                language: Some(Language::Rust),
5384                allow_ambiguous: true,
5385                limit: 10,
5386            })
5387            .unwrap();
5388        let logical_symbol_id = lookup.candidates[0].logical_symbol_id.expect("logical id");
5389        assert_eq!(lookup.candidates[0].logical_variant_count, Some(2));
5390        assert_eq!(lookup.candidates[0].logical_group_reason.as_deref(), Some("cfg_variant"));
5391
5392        let exact_variant_callers = db
5393            .find_callers_with_options(
5394                "spawn_blocking",
5395                10,
5396                &crate::query::graph::GraphTraversalOptions {
5397                    resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
5398                    symbol_id: Some(lookup.candidates[1].symbol_id),
5399                    ..Default::default()
5400                },
5401            )
5402            .unwrap();
5403        assert!(
5404            exact_variant_callers.iter().any(|edge| {
5405                edge.from_symbol.as_deref().is_some_and(|symbol| symbol.ends_with("caller"))
5406                    && edge.target.as_deref() == Some("spawn_blocking")
5407                    && edge.verified_target_symbol
5408            }),
5409            "symbol_id exact should include its logical cfg group: {exact_variant_callers:?}"
5410        );
5411        assert!(exact_variant_callers.iter().all(|edge| edge.verified_target_symbol));
5412
5413        let exact_logical = db
5414            .graph_traversal_report(
5415                "find_callers",
5416                &lookup.candidates[0],
5417                true,
5418                10,
5419                &crate::query::graph::GraphTraversalOptions {
5420                    resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
5421                    symbol_id: Some(lookup.candidates[0].symbol_id),
5422                    ..Default::default()
5423                },
5424            )
5425            .unwrap();
5426        assert_eq!(exact_logical.query.logical_symbol_id, Some(logical_symbol_id));
5427        assert_eq!(
5428            exact_logical.logical_symbol.as_ref().map(|symbol| symbol.variant_count),
5429            Some(2)
5430        );
5431        assert_eq!(exact_logical.variants.len(), 2);
5432        assert!(exact_logical.results.iter().all(|edge| edge.verified_target_symbol));
5433        assert!(
5434            exact_logical.results.iter().any(|edge| {
5435                edge.from_symbol.as_deref().is_some_and(|symbol| symbol.ends_with("caller"))
5436                    && edge.target.as_deref() == Some("spawn_blocking")
5437            }),
5438            "logical exact callers: {exact_logical:?}"
5439        );
5440
5441        fs::remove_dir_all(root).unwrap();
5442    }
5443
5444    #[test]
5445    fn indexes_real_world_rust_graph_patterns() {
5446        let root = fixture_temp_root("graph-realworld/rust");
5447        let config = source_config(root.clone(), Language::Rust);
5448        let db = IndexDatabase::rebuild(&config).unwrap();
5449
5450        assert_edge(&db, "src/lib.rs", "worker", "imports", "Syntactic");
5451        assert_edge(&db, "src/lib.rs", "Worker", "exports", "Syntactic");
5452        assert_edge(&db, "entry", "new", "calls_name", "NameOnly");
5453        assert_edge(&db, "entry", "Client", "references_type", "Syntactic");
5454        assert_edge(&db, "drive", "serve", "calls_name", "NameOnly");
5455        assert_edge(&db, "drive", "GenericRunner", "references_type", "Syntactic");
5456        assert_edge(&db, "Worker", "Service", "implements", "Syntactic");
5457        assert_edge(&db, "generic_call", "T", "references_type", "NameOnly");
5458        assert_edge(&db, "entry", "generated_call", "uses_macro", "NameOnly");
5459        let syntactic_callers = db.find_callers("serve", 10).unwrap();
5460        assert!(
5461            syntactic_callers.is_empty(),
5462            "syntactic serve callers should avoid receiver/name fallback: {syntactic_callers:?}"
5463        );
5464        let callers = db
5465            .find_callers_with_options(
5466                "serve",
5467                10,
5468                &crate::query::graph::GraphTraversalOptions {
5469                    resolution_mode: crate::query::graph::GraphResolutionMode::Fuzzy,
5470                    ..Default::default()
5471                },
5472            )
5473            .unwrap();
5474        assert!(
5475            callers.iter().any(|edge| {
5476                edge.edge_kind == "calls_name"
5477                    && edge.edge_confidence == edge.confidence
5478                    && edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("drive"))
5479            }),
5480            "serve callers: {callers:?}"
5481        );
5482
5483        fs::remove_dir_all(root).unwrap();
5484    }
5485
5486    #[test]
5487    fn indexes_typescript_graph_edges_from_tree_sitter() {
5488        let root = unique_temp_root();
5489        let _ = fs::remove_dir_all(&root);
5490        fs::create_dir_all(root.join("src")).unwrap();
5491        fs::write(
5492            root.join("src/helper.ts"),
5493            "export function helper() {}\nexport const Card = () => null;\n",
5494        )
5495        .unwrap();
5496        fs::write(
5497            root.join("src/App.tsx"),
5498            r#"
5499import { helper, Card } from "./helper";
5500
5501export function run() {
5502  helper();
5503  return <Card />;
5504}
5505
5506export const callRun = () => run();
5507"#,
5508        )
5509        .unwrap();
5510        let config = source_config(root.clone(), Language::TypeScript);
5511        let db = IndexDatabase::rebuild(&config).unwrap();
5512
5513        assert_edge(&db, "run", "helper", "calls_name", "Syntactic");
5514        assert_edge(&db, "run", "Card", "references_type", "Syntactic");
5515        assert_edge(&db, "src/App.tsx", "helper", "imports", "Syntactic");
5516        assert_edge(&db, "src/App.tsx", "run", "exports", "Syntactic");
5517        let callees = db.trace_callees("callRun", 10).unwrap();
5518        assert!(
5519            callees.iter().any(|edge| {
5520                edge.to_symbol.as_deref().is_some_and(|name| name.ends_with("run"))
5521                    && edge.confidence == "syntactic"
5522            }),
5523            "callRun callees: {callees:?}"
5524        );
5525
5526        fs::remove_dir_all(root).unwrap();
5527    }
5528
5529    #[test]
5530    fn indexes_c_graph_edges_from_tree_sitter() {
5531        let root = unique_temp_root();
5532        let _ = fs::remove_dir_all(&root);
5533        fs::create_dir_all(root.join("src")).unwrap();
5534        fs::write(
5535            root.join("src/runtime.c"),
5536            r#"
5537typedef struct Runtime Runtime;
5538
5539struct Runtime {
5540  int state;
5541};
5542
5543int helper(Runtime *runtime) {
5544  return runtime->state;
5545}
5546
5547int runtime_open(Runtime *runtime) {
5548  return helper(runtime);
5549}
5550"#,
5551        )
5552        .unwrap();
5553        let config = source_config(root.clone(), Language::C);
5554        let db = IndexDatabase::rebuild(&config).unwrap();
5555
5556        assert_edge(&db, "runtime_open", "helper", "calls_name", "Syntactic");
5557
5558        fs::remove_dir_all(root).unwrap();
5559    }
5560
5561    #[test]
5562    fn indexes_c_file_scope_macro_regions_for_search() {
5563        let root = unique_temp_root();
5564        let _ = fs::remove_dir_all(&root);
5565        fs::create_dir_all(root.join("drivers/entropy")).unwrap();
5566        fs::write(
5567            root.join("drivers/entropy/entropy.c"),
5568            r#"
5569static int entropy_init(const struct device *dev)
5570{
5571    ARG_UNUSED(dev);
5572    return 0;
5573}
5574
5575/* Entropy driver APIs structure */
5576static DEVICE_API(entropy, entropy_cryptoacc_trng_api) = {
5577    .get_entropy = entropy_cryptoacc_trng_get_entropy,
5578};
5579
5580DEVICE_DT_INST_DEFINE(0, entropy_init, NULL, NULL, NULL,
5581                      PRE_KERNEL_1, CONFIG_ENTROPY_INIT_PRIORITY,
5582                      &entropy_cryptoacc_trng_api);
5583"#,
5584        )
5585        .unwrap();
5586        let config = Config {
5587            root: root.clone(),
5588            database: root.join(".rag-rat/index.sqlite"),
5589            targets: vec![ResolvedTarget {
5590                name: "c".to_string(),
5591                language: Language::C,
5592                directories: vec![PathBuf::from("drivers/entropy")],
5593                include: vec!["**/*.c".to_string()],
5594                exclude: Vec::new(),
5595                kind: TargetKind::Source,
5596            }],
5597            local_ai: Default::default(),
5598            watch: Default::default(),
5599        };
5600        let db = IndexDatabase::rebuild(&config).unwrap();
5601
5602        let hits = db.search("DEVICE_API", 5, false).unwrap();
5603        assert!(
5604            hits.iter().any(|hit| {
5605                hit.path == "drivers/entropy/entropy.c" && hit.summary.contains("DEVICE_API")
5606            }),
5607            "DEVICE_API hits: {hits:?}"
5608        );
5609
5610        fs::remove_dir_all(root).unwrap();
5611    }
5612
5613    #[test]
5614    fn indexes_cpp_graph_edges_from_tree_sitter() {
5615        let root = unique_temp_root();
5616        let _ = fs::remove_dir_all(&root);
5617        fs::create_dir_all(root.join("src")).unwrap();
5618        fs::write(
5619            root.join("src/runtime.cpp"),
5620            r#"
5621namespace held {
5622class Runtime {
5623public:
5624  void open();
5625};
5626
5627void helper() {}
5628
5629void Runtime::open() {
5630  helper();
5631}
5632}
5633"#,
5634        )
5635        .unwrap();
5636        let config = source_config(root.clone(), Language::Cpp);
5637        let db = IndexDatabase::rebuild(&config).unwrap();
5638
5639        assert_edge(&db, "open", "helper", "calls_name", "Syntactic");
5640
5641        fs::remove_dir_all(root).unwrap();
5642    }
5643
5644    #[test]
5645    fn indexes_real_world_typescript_graph_patterns() {
5646        let root = fixture_temp_root("graph-realworld/typescript");
5647        let config = source_config(root.clone(), Language::TypeScript);
5648        let db = IndexDatabase::rebuild(&config).unwrap();
5649
5650        assert_edge(&db, "src/lib.tsx", "DefaultWidget", "imports", "Syntactic");
5651        assert_edge(&db, "src/lib.tsx", "WidgetNS", "imports", "NameOnly");
5652        assert_edge(&db, "src/lib.tsx", "WidgetProps", "imports", "Syntactic");
5653        assert_edge(&db, "src/lib.tsx", "ReExportedWidget", "exports", "NameOnly");
5654        assert_edge(&db, "useWidget", "useMemo", "calls_name", "NameOnly");
5655        assert_edge(&db, "useWidget", "DefaultWidget", "calls_name", "Syntactic");
5656        assert_edge(&db, "Shell", "renderWidget", "calls_name", "NameOnly");
5657        assert_edge(&db, "Shell", "WidgetNS", "references_type", "NameOnly");
5658        assert_edge(&db, "Shell", "DefaultWidget", "references_type", "Syntactic");
5659        assert_edge(&db, "DefaultWidget", "WidgetProps", "references_type", "Syntactic");
5660        let callees = db
5661            .trace_callees_with_options(
5662                "Shell",
5663                10,
5664                &crate::query::graph::GraphTraversalOptions {
5665                    include_references: true,
5666                    edge_kinds: None,
5667                    ..Default::default()
5668                },
5669            )
5670            .unwrap();
5671        assert!(
5672            callees.iter().any(|edge| {
5673                edge.edge_kind == "references_type"
5674                    && edge.edge_confidence == edge.confidence
5675                    && edge.to_symbol.as_deref().is_some_and(|name| name.ends_with("DefaultWidget"))
5676            }),
5677            "Shell callees: {callees:?}"
5678        );
5679
5680        fs::remove_dir_all(root).unwrap();
5681    }
5682
5683    #[test]
5684    fn rust_macro_edges_do_not_resolve_to_same_named_modules() {
5685        let root = unique_temp_root();
5686        let _ = fs::remove_dir_all(&root);
5687        fs::create_dir_all(root.join("src")).unwrap();
5688        fs::write(
5689            root.join("src/lib.rs"),
5690            r#"
5691mod format;
5692
5693fn execute_one() {
5694    let _value = format!("hello");
5695}
5696"#,
5697        )
5698        .unwrap();
5699        fs::write(root.join("src/format.rs"), "pub fn helper() {}\n").unwrap();
5700        let config = source_config(root.clone(), Language::Rust);
5701        let db = IndexDatabase::rebuild(&config).unwrap();
5702
5703        let edge = db
5704            .storage
5705            .connection()
5706            .query_row(
5707                "
5708                SELECT edge_kind, to_name, to_symbol_id, confidence, resolution, evidence
5709                FROM edges
5710                WHERE edge_kind = 'uses_macro'
5711                  AND to_name = 'format'
5712                ",
5713                [],
5714                |row| {
5715                    Ok((
5716                        row.get::<_, String>(0)?,
5717                        row.get::<_, String>(1)?,
5718                        row.get::<_, Option<i64>>(2)?,
5719                        row.get::<_, String>(3)?,
5720                        row.get::<_, String>(4)?,
5721                        row.get::<_, Option<String>>(5)?,
5722                    ))
5723                },
5724            )
5725            .unwrap();
5726        assert_eq!(edge.0, "uses_macro");
5727        assert_eq!(edge.1, "format");
5728        assert_eq!(edge.2, None);
5729        assert_eq!(edge.3, "NameOnly");
5730        assert_eq!(edge.4, "unresolved");
5731        assert!(edge.5.as_deref().is_some_and(|value| value.contains("format!")));
5732
5733        fs::remove_dir_all(root).unwrap();
5734    }
5735
5736    #[test]
5737    fn opening_old_graph_policy_rebuilds_stale_macro_edges() {
5738        let root = unique_temp_root();
5739        let _ = fs::remove_dir_all(&root);
5740        fs::create_dir_all(root.join("src")).unwrap();
5741        fs::write(
5742            root.join("src/lib.rs"),
5743            r#"
5744mod format;
5745
5746fn execute_one() {
5747    let _value = format!("hello");
5748}
5749"#,
5750        )
5751        .unwrap();
5752        fs::write(root.join("src/format.rs"), "pub fn helper() {}\n").unwrap();
5753        let config = source_config(root.clone(), Language::Rust);
5754        let db = IndexDatabase::rebuild(&config).unwrap();
5755        db.storage
5756            .connection()
5757            .execute("UPDATE index_meta SET value = 'old' WHERE key = 'graph_index_version'", [])
5758            .unwrap();
5759        db.storage
5760            .connection()
5761            .execute(
5762                "
5763                UPDATE edges
5764                SET edge_kind = 'calls_name',
5765                    to_symbol_id = (SELECT id FROM symbols WHERE name = 'format' LIMIT 1),
5766                    confidence = 'Syntactic',
5767                    evidence = NULL,
5768                    resolution = 'syntactic'
5769                WHERE to_name = 'format'
5770                ",
5771                [],
5772            )
5773            .unwrap();
5774        drop(db);
5775
5776        let reopened = IndexDatabase::open(&config.database).unwrap();
5777        let edge = reopened
5778            .storage
5779            .connection()
5780            .query_row(
5781                "
5782                SELECT edge_kind, to_symbol_id, confidence, resolution, evidence
5783                FROM edges
5784                WHERE to_name = 'format'
5785                  AND edge_kind = 'uses_macro'
5786                ",
5787                [],
5788                |row| {
5789                    Ok((
5790                        row.get::<_, String>(0)?,
5791                        row.get::<_, Option<i64>>(1)?,
5792                        row.get::<_, String>(2)?,
5793                        row.get::<_, String>(3)?,
5794                        row.get::<_, Option<String>>(4)?,
5795                    ))
5796                },
5797            )
5798            .unwrap();
5799        assert_eq!(edge.0, "uses_macro");
5800        assert_eq!(edge.1, None);
5801        assert_eq!(edge.2, "NameOnly");
5802        assert_eq!(edge.3, "unresolved");
5803        assert!(edge.4.as_deref().is_some_and(|value| value.contains("format!")));
5804
5805        fs::remove_dir_all(root).unwrap();
5806    }
5807
5808    #[test]
5809    fn qualified_common_member_calls_do_not_resolve_by_short_name() {
5810        let root = unique_temp_root();
5811        let _ = fs::remove_dir_all(&root);
5812        fs::create_dir_all(root.join("src")).unwrap();
5813        fs::write(
5814            root.join("src/lib.rs"),
5815            r#"
5816pub struct AlertsStore;
5817
5818impl AlertsStore {
5819    pub fn new() -> Self {
5820        Self
5821    }
5822}
5823
5824pub fn caller() {
5825    let _items: Vec<String> = Vec::new();
5826}
5827"#,
5828        )
5829        .unwrap();
5830        let config = source_config(root.clone(), Language::Rust);
5831        let db = IndexDatabase::rebuild(&config).unwrap();
5832
5833        let edge = db
5834            .storage
5835            .connection()
5836            .query_row(
5837                "
5838                SELECT to_name, target_qualified_name, to_symbol_id, confidence, resolution
5839                FROM edges
5840                WHERE from_name LIKE '%caller'
5841                  AND edge_kind = 'calls_name'
5842                  AND to_name = 'new'
5843                ",
5844                [],
5845                |row| {
5846                    Ok((
5847                        row.get::<_, String>(0)?,
5848                        row.get::<_, Option<String>>(1)?,
5849                        row.get::<_, Option<i64>>(2)?,
5850                        row.get::<_, String>(3)?,
5851                        row.get::<_, String>(4)?,
5852                    ))
5853                },
5854            )
5855            .unwrap();
5856        assert_eq!(edge.0, "new");
5857        assert_eq!(edge.1.as_deref(), Some("Vec::new"));
5858        assert_eq!(edge.2, None);
5859        assert_eq!(edge.3, "NameOnly");
5860        assert_eq!(edge.4, "unresolved");
5861
5862        fs::remove_dir_all(root).unwrap();
5863    }
5864
5865    #[test]
5866    fn macro_edges_do_not_resolve_to_same_named_typescript_symbols() {
5867        let root = unique_temp_root();
5868        let _ = fs::remove_dir_all(&root);
5869        fs::create_dir_all(root.join("src")).unwrap();
5870        fs::write(
5871            root.join("src/lib.rs"),
5872            r#"
5873fn rust_entry() {
5874    let _payload = json!({"ok": true});
5875}
5876"#,
5877        )
5878        .unwrap();
5879        fs::write(root.join("src/preferences.ts"), "export function json() { return {}; }\n")
5880            .unwrap();
5881        let mut config = source_config(root.clone(), Language::Rust);
5882        config.targets.push(ResolvedTarget {
5883            name: "typescript".to_string(),
5884            language: Language::TypeScript,
5885            directories: vec![PathBuf::from("src")],
5886            include: vec!["**/*.ts".to_string()],
5887            exclude: Vec::new(),
5888            kind: TargetKind::Source,
5889        });
5890        let db = IndexDatabase::rebuild(&config).unwrap();
5891
5892        let edge = db
5893            .storage
5894            .connection()
5895            .query_row(
5896                "
5897                SELECT edge_kind, to_name, to_symbol_id, confidence, resolution, evidence
5898                FROM edges
5899                WHERE edge_kind = 'uses_macro'
5900                  AND to_name = 'json'
5901                ",
5902                [],
5903                |row| {
5904                    Ok((
5905                        row.get::<_, String>(0)?,
5906                        row.get::<_, String>(1)?,
5907                        row.get::<_, Option<i64>>(2)?,
5908                        row.get::<_, String>(3)?,
5909                        row.get::<_, String>(4)?,
5910                        row.get::<_, Option<String>>(5)?,
5911                    ))
5912                },
5913            )
5914            .unwrap();
5915        assert_eq!(edge.0, "uses_macro");
5916        assert_eq!(edge.1, "json");
5917        assert_eq!(edge.2, None);
5918        assert_eq!(edge.3, "NameOnly");
5919        assert_eq!(edge.4, "unresolved");
5920        assert!(edge.5.as_deref().is_some_and(|value| value.contains("json!")));
5921
5922        fs::remove_dir_all(root).unwrap();
5923    }
5924
5925    #[test]
5926    fn qualified_crate_helper_callers_use_name_fallback() {
5927        let root = unique_temp_root();
5928        let _ = fs::remove_dir_all(&root);
5929        fs::create_dir_all(root.join("src")).unwrap();
5930        fs::write(
5931            root.join("src/lib.rs"),
5932            r#"
5933pub mod task_spawn {
5934    pub fn spawn_blocking() {}
5935}
5936
5937pub fn first() {
5938    crate::task_spawn::spawn_blocking();
5939}
5940
5941pub fn second() {
5942    task_spawn::spawn_blocking();
5943}
5944"#,
5945        )
5946        .unwrap();
5947        let config = source_config(root.clone(), Language::Rust);
5948        let db = IndexDatabase::rebuild(&config).unwrap();
5949
5950        let callers = db.find_callers("spawn_blocking", 10).unwrap();
5951        assert!(
5952            callers.iter().any(|edge| {
5953                edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("first"))
5954                    && edge.edge_kind == "calls_name"
5955                    && edge.resolution == "target_name_fallback"
5956            }),
5957            "spawn_blocking callers: {callers:?}"
5958        );
5959        assert!(
5960            callers.iter().any(|edge| {
5961                edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("second"))
5962                    && edge.edge_kind == "calls_name"
5963            }),
5964            "spawn_blocking callers: {callers:?}"
5965        );
5966
5967        fs::remove_dir_all(root).unwrap();
5968    }
5969
5970    #[test]
5971    fn caller_lookup_does_not_match_related_names_or_chain_evidence() {
5972        let root = unique_temp_root();
5973        let _ = fs::remove_dir_all(&root);
5974        fs::create_dir_all(root.join("src")).unwrap();
5975        fs::write(
5976            root.join("src/lib.rs"),
5977            r#"
5978pub mod runtime {
5979    pub mod task_spawn {
5980        pub fn spawn() {}
5981        pub fn spawn_blocking() -> JoinHandle {
5982            JoinHandle
5983        }
5984        pub fn spawn_blocking_handle() {}
5985        pub fn spawn_blocking_offload() -> JoinHandle {
5986            JoinHandle
5987        }
5988    }
5989}
5990
5991pub struct JoinHandle;
5992
5993impl JoinHandle {
5994    pub fn map_err(self) {}
5995}
5996
5997pub fn direct() {
5998    crate::runtime::task_spawn::spawn_blocking();
5999}
6000
6001pub fn related_handle() {
6002    crate::runtime::task_spawn::spawn_blocking_handle();
6003}
6004
6005pub fn related_offload_chain() {
6006    crate::runtime::task_spawn::spawn_blocking_offload().map_err();
6007}
6008
6009pub fn related_spawn_with_text() {
6010    crate::runtime::task_spawn::spawn();
6011}
6012"#,
6013        )
6014        .unwrap();
6015        let config = source_config(root.clone(), Language::Rust);
6016        let db = IndexDatabase::rebuild(&config).unwrap();
6017
6018        let callers = db.find_callers("spawn_blocking", 20).unwrap();
6019        assert!(
6020            callers.iter().any(|edge| {
6021                edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("direct"))
6022                    && edge.target.as_deref() == Some("spawn_blocking")
6023                    && edge.edge_kind == "calls_name"
6024            }),
6025            "spawn_blocking callers: {callers:?}"
6026        );
6027        assert!(
6028            callers.iter().all(|edge| {
6029                !edge.from_symbol.as_deref().is_some_and(|name| {
6030                    name.ends_with("related_handle")
6031                        || name.ends_with("related_offload_chain")
6032                        || name.ends_with("related_spawn_with_text")
6033                }) && !matches!(
6034                    edge.target.as_deref(),
6035                    Some("spawn_blocking_handle" | "spawn_blocking_offload" | "spawn" | "map_err")
6036                )
6037            }),
6038            "caller lookup leaked related names or chain evidence: {callers:?}"
6039        );
6040
6041        let qualified_callers = db.find_callers("src/lib.rs::spawn_blocking", 20).unwrap();
6042        assert!(
6043            qualified_callers.iter().any(|edge| {
6044                edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("direct"))
6045                    && edge.target.as_deref() == Some("spawn_blocking")
6046                    && edge.edge_kind == "calls_name"
6047            }),
6048            "qualified spawn_blocking callers: {qualified_callers:?}"
6049        );
6050        assert!(
6051            qualified_callers.iter().all(|edge| {
6052                !edge.from_symbol.as_deref().is_some_and(|name| {
6053                    name.ends_with("related_handle")
6054                        || name.ends_with("related_offload_chain")
6055                        || name.ends_with("related_spawn_with_text")
6056                }) && !matches!(
6057                    edge.target.as_deref(),
6058                    Some("spawn_blocking_handle" | "spawn_blocking_offload" | "spawn" | "map_err")
6059                )
6060            }),
6061            "qualified caller lookup leaked related names or chain evidence: {qualified_callers:?}"
6062        );
6063
6064        fs::remove_dir_all(root).unwrap();
6065    }
6066
6067    #[test]
6068    fn files_past_the_old_structural_cap_still_contribute_symbols_and_edges() {
6069        let root = unique_temp_root();
6070        let _ = fs::remove_dir_all(&root);
6071        fs::create_dir_all(root.join("src")).unwrap();
6072        let filler =
6073            (0..700).map(|idx| format!("pub fn filler_{idx}() {{}}\n")).collect::<String>();
6074        fs::write(
6075            root.join("src/lib.rs"),
6076            format!(
6077                r#"
6078pub mod task_spawn {{
6079    pub fn spawn_blocking() {{}}
6080}}
6081
6082{filler}
6083
6084pub fn caller() {{
6085    crate::task_spawn::spawn_blocking();
6086}}
6087"#
6088            ),
6089        )
6090        .unwrap();
6091        let config = source_config(root.clone(), Language::Rust);
6092        assert!(fs::metadata(root.join("src/lib.rs")).unwrap().len() > 10_000);
6093        let db = IndexDatabase::rebuild(&config).unwrap();
6094
6095        let symbols = db.symbols("caller", Some(Language::Rust), 10).unwrap();
6096        assert!(
6097            symbols.iter().any(|symbol| symbol.name == "caller"),
6098            "caller symbols: {symbols:?}"
6099        );
6100        let callers = db.find_callers("spawn_blocking", 10).unwrap();
6101        assert!(
6102            callers.iter().any(|edge| {
6103                edge.edge_kind == "calls_name"
6104                    && edge.target.as_deref() == Some("spawn_blocking")
6105                    && edge.callsite.as_ref().is_some_and(|callsite| callsite.line > 700)
6106            }),
6107            "spawn_blocking callers: {callers:?}"
6108        );
6109        let impact =
6110            db.impact_surface("callers of crate::task_spawn::spawn_blocking in src", 10).unwrap();
6111        assert!(
6112            impact.iter().any(|item| {
6113                item.category == "Direct structural impact" && item.reason == "direct_caller"
6114            }),
6115            "impact: {impact:?}"
6116        );
6117
6118        fs::remove_dir_all(root).unwrap();
6119    }
6120
6121    #[test]
6122    fn impact_surface_uses_high_signal_query_symbols_and_call_edges() {
6123        let root = unique_temp_root();
6124        let _ = fs::remove_dir_all(&root);
6125        fs::create_dir_all(root.join("src")).unwrap();
6126        fs::write(
6127            root.join("src/lib.rs"),
6128            r#"
6129pub mod runtime {
6130    pub fn unrelated_runtime_symbol() {}
6131}
6132
6133pub mod task_spawn {
6134    pub fn spawn_blocking<F, T>(f: F) -> T
6135    where
6136        F: FnOnce() -> T + Send + 'static,
6137        T: Send + 'static,
6138    {
6139        f()
6140    }
6141}
6142
6143pub fn caller() {
6144    crate::task_spawn::spawn_blocking(|| 1);
6145}
6146"#,
6147        )
6148        .unwrap();
6149        let config = source_config(root.clone(), Language::Rust);
6150        let db = IndexDatabase::rebuild(&config).unwrap();
6151        let impact = db
6152            .impact_surface(
6153                "change runtime task_spawn spawn_blocking wasm inline native blocking pool",
6154                20,
6155            )
6156            .unwrap();
6157        assert!(
6158            impact.iter().any(|item| {
6159                item.category == "Direct structural impact"
6160                    && item.reason == "direct_caller"
6161                    && item.symbol.as_deref().is_some_and(|symbol| symbol.ends_with("caller"))
6162            }),
6163            "spawn_blocking caller should be present: {impact:?}"
6164        );
6165        assert!(
6166            impact.iter().all(|item| {
6167                !(item.reason == "exact_symbol_definition"
6168                    && item.symbol.as_deref().is_some_and(|symbol| symbol.ends_with("runtime")))
6169            }),
6170            "broad `runtime` token should not become an exact impact seed: {impact:?}"
6171        );
6172        assert!(
6173            impact.iter().all(|item| {
6174                !item.evidence.iter().any(|evidence| evidence.contains("references_type"))
6175                    && item.symbol.as_deref() != Some("Send")
6176            }),
6177            "type references should not appear as direct impact: {impact:?}"
6178        );
6179
6180        fs::remove_dir_all(root).unwrap();
6181    }
6182
6183    #[test]
6184    fn impact_surface_collapses_file_matches_to_one_row_per_file() {
6185        // Regression for #48: a file-granularity match (path/chunk text) used to fan out into one
6186        // row per symbol in the file. Each such section must now yield at most one row per file.
6187        let root = unique_temp_root();
6188        let _ = fs::remove_dir_all(&root);
6189        fs::create_dir_all(root.join("src")).unwrap();
6190        fs::write(
6191            root.join("src/widget_store.rs"),
6192            "pub fn widget_alpha() {}\npub fn widget_beta() {}\n\
6193             pub fn widget_gamma() {}\npub fn widget_delta() {}\n",
6194        )
6195        .unwrap();
6196        let config = source_config(root.clone(), Language::Rust);
6197        let db = IndexDatabase::rebuild(&config).unwrap();
6198
6199        let selector = crate::query::symbol::SymbolSelector {
6200            logical_symbol_id: None,
6201            symbol_id: None,
6202            symbol_path: None,
6203            symbol: Some("widget_alpha".to_string()),
6204            language: Some(Language::Rust),
6205            allow_ambiguous: false,
6206            limit: 10,
6207        };
6208        let symbol = db.select_symbol(&selector).unwrap().unwrap().expect("symbol");
6209        let report = db
6210            .impact_surface_report_for_selected_symbol(
6211                &symbol,
6212                50,
6213                &crate::query::impact::ImpactSurfaceOptions::default(),
6214            )
6215            .unwrap();
6216
6217        for section in [
6218            &report.text_fallback_hits,
6219            &report.tests_touching_symbol_path,
6220            &report.docs_mentioning_symbol_path,
6221        ] {
6222            let total = section.len();
6223            let mut paths: Vec<&str> = section.iter().map(|item| item.path.as_str()).collect();
6224            paths.sort_unstable();
6225            paths.dedup();
6226            assert_eq!(paths.len(), total, "section must have one row per file: {section:?}");
6227
6228            // Precedence: a path match must not carry a spurious symbol (a qualified name is
6229            // `path::symbol`, so a path needle matches every symbol in the file).
6230            for item in section {
6231                if item.evidence.iter().any(|evidence| evidence.starts_with("path match")) {
6232                    assert!(item.symbol.is_none(), "path match must not name a symbol: {item:?}");
6233                }
6234            }
6235        }
6236
6237        let store_rows = report
6238            .text_fallback_hits
6239            .iter()
6240            .filter(|item| item.path.ends_with("widget_store.rs"))
6241            .count();
6242        assert_eq!(store_rows, 1, "a file with four symbols collapses to one fallback row");
6243
6244        fs::remove_dir_all(root).unwrap();
6245    }
6246
6247    #[test]
6248    fn docs_for_symbol_prefers_local_source_context_before_broad_markdown() {
6249        let root = unique_temp_root();
6250        let _ = fs::remove_dir_all(&root);
6251        fs::create_dir_all(root.join("src/runtime")).unwrap();
6252        fs::create_dir_all(root.join("docs")).unwrap();
6253        fs::write(
6254            root.join("src/runtime/task_spawn.rs"),
6255            r#"
6256pub fn spawn_blocking<F, T>(f: F) -> T
6257where
6258    F: FnOnce() -> T + Send + 'static,
6259    T: Send + 'static,
6260{
6261    f()
6262}
6263"#,
6264        )
6265        .unwrap();
6266        fs::write(
6267            root.join("docs/phrase-persistence.md"),
6268            "# Phrase persistence\nUnrelated notes mention spawn_blocking in passing.\n",
6269        )
6270        .unwrap();
6271        fs::write(
6272            root.join("docs/task_spawn.md"),
6273            "# task_spawn\nLocal task_spawn notes explain spawn_blocking.\n",
6274        )
6275        .unwrap();
6276        let config = Config {
6277            root: root.clone(),
6278            database: root.join(".rag-rat/index.sqlite"),
6279            targets: vec![
6280                ResolvedTarget {
6281                    name: "rust".to_string(),
6282                    language: Language::Rust,
6283                    directories: vec![PathBuf::from("src")],
6284                    include: vec!["src/".to_string()],
6285                    exclude: Vec::new(),
6286                    kind: TargetKind::Source,
6287                },
6288                ResolvedTarget {
6289                    name: "markdown".to_string(),
6290                    language: Language::Markdown,
6291                    directories: vec![PathBuf::from("docs")],
6292                    include: vec!["**/*.md".to_string()],
6293                    exclude: Vec::new(),
6294                    kind: TargetKind::Docs,
6295                },
6296            ],
6297            local_ai: Default::default(),
6298            watch: Default::default(),
6299        };
6300        let db = IndexDatabase::rebuild(&config).unwrap();
6301        let symbol = db.symbols("spawn_blocking", Some(Language::Rust), 10).unwrap().remove(0);
6302        let hits = db.docs_for_selected_symbol(&symbol, 10).unwrap();
6303        assert_eq!(hits[0].path, "src/runtime/task_spawn.rs", "docs hits: {hits:?}");
6304        let phrase_index = hits.iter().position(|hit| hit.path == "docs/phrase-persistence.md");
6305        let task_spawn_index = hits.iter().position(|hit| hit.path == "docs/task_spawn.md");
6306        assert!(
6307            phrase_index.is_none_or(|phrase| task_spawn_index.is_some_and(|local| local < phrase)),
6308            "path-local task_spawn docs should outrank unrelated phrase docs: {hits:?}"
6309        );
6310
6311        fs::remove_dir_all(root).unwrap();
6312    }
6313
6314    #[test]
6315    fn partial_tree_sitter_trees_still_contribute_valid_symbols_and_edges() {
6316        let root = unique_temp_root();
6317        let _ = fs::remove_dir_all(&root);
6318        fs::create_dir_all(root.join("src")).unwrap();
6319        fs::write(
6320            root.join("src/lib.rs"),
6321            r#"
6322pub fn helper() {}
6323
6324pub fn caller() {
6325    helper();
6326}
6327
6328fn broken( {
6329"#,
6330        )
6331        .unwrap();
6332        let config = source_config(root.clone(), Language::Rust);
6333        let db = IndexDatabase::rebuild(&config).unwrap();
6334
6335        let symbols = db.symbols("caller", Some(Language::Rust), 10).unwrap();
6336        assert!(
6337            symbols.iter().any(|symbol| symbol.name == "caller"),
6338            "caller symbols: {symbols:?}"
6339        );
6340        assert_edge(&db, "caller", "helper", "calls_name", "Syntactic");
6341
6342        fs::remove_dir_all(root).unwrap();
6343    }
6344
6345    #[test]
6346    fn receiver_method_calls_do_not_bind_to_same_named_free_functions() {
6347        let root = unique_temp_root();
6348        let _ = fs::remove_dir_all(&root);
6349        fs::create_dir_all(root.join("src")).unwrap();
6350        fs::write(
6351            root.join("src/lib.rs"),
6352            r#"
6353pub fn spawn_blocking() {}
6354
6355pub fn caller(joinset: JoinSet) {
6356    joinset.spawn_blocking();
6357}
6358
6359pub struct JoinSet;
6360"#,
6361        )
6362        .unwrap();
6363        let config = source_config(root.clone(), Language::Rust);
6364        let db = IndexDatabase::rebuild(&config).unwrap();
6365
6366        let edge = db
6367            .storage
6368            .connection()
6369            .query_row(
6370                "
6371                SELECT to_name, target_qualified_name, to_symbol_id, confidence, resolution, receiver_hint
6372                FROM edges
6373                WHERE from_name LIKE '%caller'
6374                  AND edge_kind = 'calls_name'
6375                  AND to_name = 'spawn_blocking'
6376                ",
6377                [],
6378                |row| {
6379                    Ok((
6380                        row.get::<_, String>(0)?,
6381                        row.get::<_, Option<String>>(1)?,
6382                        row.get::<_, Option<i64>>(2)?,
6383                        row.get::<_, String>(3)?,
6384                        row.get::<_, String>(4)?,
6385                        row.get::<_, Option<String>>(5)?,
6386                    ))
6387                },
6388            )
6389            .unwrap();
6390        assert_eq!(edge.0, "spawn_blocking");
6391        assert_eq!(edge.1.as_deref(), Some("joinset::spawn_blocking"));
6392        assert_eq!(edge.2, None);
6393        assert_eq!(edge.3, "NameOnly");
6394        assert_eq!(edge.4, "unresolved");
6395        assert_eq!(edge.5.as_deref(), Some("joinset"));
6396
6397        fs::remove_dir_all(root).unwrap();
6398    }
6399
6400    #[test]
6401    fn trace_callees_excludes_type_references_by_default() {
6402        let root = unique_temp_root();
6403        let _ = fs::remove_dir_all(&root);
6404        fs::create_dir_all(root.join("src")).unwrap();
6405        fs::write(
6406            root.join("src/lib.rs"),
6407            r#"
6408pub struct JoinError;
6409pub enum Result<T, E> { Ok(T), Err(E) }
6410pub fn helper() {}
6411
6412pub fn spawn_blocking<F, T>(f: F) -> Result<T, JoinError>
6413where
6414    F: FnOnce() -> T + Send + 'static,
6415    T: Send + 'static,
6416{
6417    helper();
6418    tokio::task::spawn_blocking(f)
6419}
6420"#,
6421        )
6422        .unwrap();
6423        let config = source_config(root.clone(), Language::Rust);
6424        let db = IndexDatabase::rebuild(&config).unwrap();
6425
6426        let default_callees = db.trace_callees("spawn_blocking", 20).unwrap();
6427        assert!(
6428            default_callees.iter().any(|edge| {
6429                edge.edge_kind == "calls_name"
6430                    && edge.target.as_deref() == Some("helper")
6431                    && edge.verified_target_symbol
6432            }),
6433            "default callees: {default_callees:?}"
6434        );
6435        assert!(
6436            default_callees
6437                .iter()
6438                .all(|edge| edge.target_qualified_name.as_deref()
6439                    != Some("tokio::task::spawn_blocking")),
6440            "default callees leaked unresolved external call: {default_callees:?}"
6441        );
6442        assert!(
6443            default_callees.iter().all(|edge| edge.edge_kind != "references_type"),
6444            "default callees leaked type refs: {default_callees:?}"
6445        );
6446        assert!(
6447            default_callees.iter().all(|edge| !matches!(
6448                edge.target.as_deref(),
6449                Some("F" | "T" | "Send" | "Result" | "JoinError")
6450            )),
6451            "default callees leaked generic/type targets: {default_callees:?}"
6452        );
6453
6454        let with_refs = db
6455            .trace_callees_with_options(
6456                "spawn_blocking",
6457                20,
6458                &crate::query::graph::GraphTraversalOptions {
6459                    include_references: true,
6460                    edge_kinds: None,
6461                    ..Default::default()
6462                },
6463            )
6464            .unwrap();
6465        assert!(
6466            with_refs.iter().any(|edge| edge.edge_kind == "references_type"),
6467            "reference-enabled callees: {with_refs:?}"
6468        );
6469
6470        let with_unresolved = db
6471            .trace_callees_with_options(
6472                "spawn_blocking",
6473                20,
6474                &crate::query::graph::GraphTraversalOptions {
6475                    include_unresolved: true,
6476                    ..Default::default()
6477                },
6478            )
6479            .unwrap();
6480        assert!(
6481            with_unresolved
6482                .iter()
6483                .any(|edge| edge.target_qualified_name.as_deref()
6484                    == Some("tokio::task::spawn_blocking")),
6485            "unresolved-enabled callees: {with_unresolved:?}"
6486        );
6487
6488        fs::remove_dir_all(root).unwrap();
6489    }
6490
6491    #[test]
6492    fn trace_callees_defaults_to_repo_relevant_calls() {
6493        let root = unique_temp_root();
6494        let _ = fs::remove_dir_all(&root);
6495        fs::create_dir_all(root.join("src")).unwrap();
6496        fs::write(
6497            root.join("src/lib.rs"),
6498            r#"
6499pub fn repo_helper() {}
6500
6501pub fn caller(input: Result<String, String>) -> String {
6502    repo_helper();
6503    let values: Vec<String> = Vec::new();
6504    let _ = input.map_err(|error| error.to_string());
6505    let _ = Some("value").unwrap_or_else(|| "fallback");
6506    let _ = format!("hello");
6507    values.get(0).unwrap_or_else(|| "fallback").to_string()
6508}
6509"#,
6510        )
6511        .unwrap();
6512        let config = source_config(root.clone(), Language::Rust);
6513        let db = IndexDatabase::rebuild(&config).unwrap();
6514
6515        let default_callees = db.trace_callees("caller", 20).unwrap();
6516        assert!(
6517            default_callees.iter().any(|edge| edge.target.as_deref() == Some("repo_helper")),
6518            "default callees should keep repo-local calls: {default_callees:?}"
6519        );
6520        assert!(
6521            default_callees.iter().all(|edge| {
6522                edge.edge_kind != "uses_macro"
6523                    && !matches!(
6524                        edge.target.as_deref(),
6525                        Some("new" | "map_err" | "unwrap_or_else" | "to_string" | "format")
6526                    )
6527            }),
6528            "default callees leaked low-signal calls: {default_callees:?}"
6529        );
6530
6531        let expanded = db
6532            .trace_callees_with_options(
6533                "caller",
6534                20,
6535                &crate::query::graph::GraphTraversalOptions {
6536                    include_unresolved: true,
6537                    include_macros: true,
6538                    include_common_methods: true,
6539                    ..Default::default()
6540                },
6541            )
6542            .unwrap();
6543        assert!(
6544            expanded.iter().any(|edge| edge.edge_kind == "uses_macro"),
6545            "macro-enabled callees: {expanded:?}"
6546        );
6547        assert!(
6548            expanded.iter().any(|edge| edge.target.as_deref() == Some("unwrap_or_else")),
6549            "common-method-enabled callees: {expanded:?}"
6550        );
6551
6552        fs::remove_dir_all(root).unwrap();
6553    }
6554
6555    #[test]
6556    fn indexes_kotlin_graph_edges_from_tree_sitter() {
6557        let root = unique_temp_root();
6558        let _ = fs::remove_dir_all(&root);
6559        fs::create_dir_all(root.join("src")).unwrap();
6560        fs::write(
6561            root.join("src/Main.kt"),
6562            r#"
6563package dev.cq27.test
6564
6565import dev.cq27.lib.ExternalThing
6566
6567interface Syncable
6568
6569class MainBridge : Syncable {
6570  suspend fun syncOnce() {
6571    helper()
6572    ExternalThing()
6573  }
6574}
6575
6576fun helper() {}
6577"#,
6578        )
6579        .unwrap();
6580        let config = source_config(root.clone(), Language::Kotlin);
6581        let db = IndexDatabase::rebuild(&config).unwrap();
6582
6583        assert_edge(&db, "syncOnce", "helper", "calls_name", "Syntactic");
6584        assert_edge(&db, "MainBridge", "Syncable", "implements", "Syntactic");
6585        assert_edge(&db, "src/Main.kt", "ExternalThing", "imports", "NameOnly");
6586        let impact = db.impact_surface("helper", 10).unwrap();
6587        assert!(
6588            impact.iter().any(|item| {
6589                item.category == "Direct structural impact" && item.reason == "direct_caller"
6590            }),
6591            "impact: {impact:?}"
6592        );
6593
6594        fs::remove_dir_all(root).unwrap();
6595    }
6596
6597    #[test]
6598    fn indexes_real_world_kotlin_graph_patterns() {
6599        let root = fixture_temp_root("graph-realworld/kotlin");
6600        let config = source_config(root.clone(), Language::Kotlin);
6601        let db = IndexDatabase::rebuild(&config).unwrap();
6602
6603        assert_edge(&db, "src/Main.kt", "ExternalFactory", "imports", "NameOnly");
6604        assert_edge(&db, "Worker", "companion", "contains", "Exact");
6605        assert_edge(&db, "companion", "create", "contains", "Exact");
6606        assert_edge(&db, "syncOnce", "create", "calls_name", "Syntactic");
6607        assert_edge(&db, "syncOnce", "Worker", "references_type", "Syntactic");
6608        assert_edge(&db, "syncOnce", "run", "calls_name", "Syntactic");
6609        assert_edge(&db, "syncOnce", "SingletonRunner", "references_type", "Syntactic");
6610        assert_edge(&db, "syncOnce", "ExternalFactory", "calls_name", "NameOnly");
6611        assert_edge(&db, "syncOnce", "ExternalFactory", "references_type", "NameOnly");
6612        assert_edge(&db, "syncOnce", "cleaned", "calls_name", "Syntactic");
6613        let callers = db.find_callers("cleaned", 10).unwrap();
6614        assert!(
6615            callers.iter().any(|edge| {
6616                edge.edge_kind == "calls_name"
6617                    && edge.edge_confidence == edge.confidence
6618                    && edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("syncOnce"))
6619            }),
6620            "cleaned callers: {callers:?}"
6621        );
6622
6623        fs::remove_dir_all(root).unwrap();
6624    }
6625
6626    #[test]
6627    fn kotlin_caller_lookup_respects_qualified_receivers_for_common_method_names() {
6628        let root = unique_temp_root();
6629        let _ = fs::remove_dir_all(&root);
6630        fs::create_dir_all(root.join("src")).unwrap();
6631        fs::write(
6632            root.join("src/Main.kt"),
6633            r#"
6634package dev.cq27.test
6635
6636object WatchProposalBuilder {
6637  fun build(): String = "proposal"
6638}
6639
6640class AndroidDialogBuilder {
6641  fun build(): String = "dialog"
6642}
6643
6644fun actualCaller() {
6645  WatchProposalBuilder.build()
6646}
6647
6648fun unrelatedBuilderCalls(dialog: AndroidDialogBuilder) {
6649  dialog.build()
6650  AndroidDialogBuilder().build()
6651}
6652"#,
6653        )
6654        .unwrap();
6655        let config = source_config(root.clone(), Language::Kotlin);
6656        let db = IndexDatabase::rebuild(&config).unwrap();
6657        let target = db
6658            .symbols("build", Some(Language::Kotlin), 10)
6659            .unwrap()
6660            .into_iter()
6661            .find(|symbol| symbol.qualified_name.contains("WatchProposalBuilder"))
6662            .expect("WatchProposalBuilder.build symbol");
6663        let callers = db
6664            .find_callers_with_options(
6665                "build",
6666                20,
6667                &crate::query::graph::GraphTraversalOptions {
6668                    resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
6669                    symbol_id: Some(target.symbol_id),
6670                    ..Default::default()
6671                },
6672            )
6673            .unwrap();
6674        assert_eq!(
6675            callers
6676                .iter()
6677                .filter(|edge| edge
6678                    .from_symbol
6679                    .as_deref()
6680                    .is_some_and(|name| name.ends_with("actualCaller")))
6681                .count(),
6682            1,
6683            "actual caller should be present once: {callers:?}"
6684        );
6685        assert!(
6686            callers.iter().all(|edge| edge
6687                .from_symbol
6688                .as_deref()
6689                .is_none_or(|name| !name.ends_with("unrelatedBuilderCalls"))),
6690            "unrelated builder calls should not resolve to WatchProposalBuilder.build: {callers:?}"
6691        );
6692
6693        fs::remove_dir_all(root).unwrap();
6694    }
6695
6696    #[test]
6697    fn github_sync_caches_papertrail_and_rationale_without_query_time_crawling() {
6698        let (root, config) =
6699            markdown_config("# Decision\nRefs cq27-dev/rag-rat#42\nwe will keep sqlite\n");
6700        let db = IndexDatabase::rebuild(&config).unwrap();
6701        let mock = MockGitHubClient;
6702
6703        let offline =
6704            github::sync_from_refs::<MockGitHubClient>(db.storage.connection(), &root, None, true)
6705                .unwrap();
6706        assert!(offline.offline);
6707        assert_eq!(offline.discovered_refs, 1);
6708        assert_eq!(offline.synced_items, 0);
6709
6710        let report =
6711            github::sync_from_refs(db.storage.connection(), &root, Some(&mock), false).unwrap();
6712        assert!(!report.offline);
6713        assert_eq!(report.discovered_refs, 1);
6714        assert_eq!(report.synced_items, 5);
6715        assert_eq!(report.status.issues, 1);
6716        assert_eq!(report.status.comments, 1);
6717        assert_eq!(report.status.pulls, 1);
6718        assert_eq!(report.status.reviews, 1);
6719        assert_eq!(report.status.review_comments, 1);
6720
6721        let issue_hits = db.github_issue_search("sqlite", 10).unwrap();
6722        assert_eq!(issue_hits.len(), 1);
6723        assert_eq!(issue_hits[0].classification, "decision");
6724        assert_eq!(issue_hits[0].evidence_kind, "historical_github");
6725
6726        let refs = db.github_refs_for_path("docs/search.md", 10).unwrap();
6727        assert_eq!(refs.len(), 1);
6728        assert_eq!(refs[0].source_kind, "file");
6729
6730        let rationale = db.rationale_search("risk", 10).unwrap();
6731        assert!(rationale.iter().any(|item| item.classification == "risk"));
6732        let issue_ref_rationale = db.rationale_search("Fixes #42", 10).unwrap();
6733        assert_eq!(issue_ref_rationale.first().map(|item| item.number), Some(42));
6734        assert_eq!(
6735            issue_ref_rationale.first().map(|item| item.evidence_kind),
6736            Some("literal_github_ref")
6737        );
6738        assert_eq!(issue_ref_rationale.first().map(|item| item.score), Some(1.0));
6739        assert!(
6740            issue_ref_rationale.iter().any(|item| item.number == 42),
6741            "issue ref rationale should use structured GitHub refs: {issue_ref_rationale:?}"
6742        );
6743
6744        let chunk_id = first_chunk_id(&db);
6745        let papertrail = db.papertrail_for_chunk(chunk_id, 10).unwrap().unwrap();
6746        assert!(papertrail.current_source.is_some());
6747        assert!(!papertrail.github_evidence.is_empty());
6748        assert!(papertrail.github_evidence.iter().all(|item| {
6749            matches!(item.evidence_kind, "historical_github" | "literal_github_ref")
6750        }));
6751
6752        fs::remove_dir_all(root).unwrap();
6753    }
6754
6755    #[test]
6756    fn papertrail_for_commit_prefers_commit_sourced_github_refs() {
6757        let root = unique_temp_root();
6758        let _ = fs::remove_dir_all(&root);
6759        fs::create_dir_all(root.join("docs")).unwrap();
6760        run_git(&root, &["init"]);
6761        run_git(&root, &["config", "user.name", "Rag Rat"]);
6762        run_git(&root, &["config", "user.email", "rag@example.com"]);
6763        fs::write(root.join("docs/search.md"), "# Decision\nalpha\n").unwrap();
6764        run_git(&root, &["add", "."]);
6765        run_git(&root, &["commit", "-m", "Fix search rationale", "-m", "Fixes #42"]);
6766
6767        let config = markdown_config_for_root(root.clone());
6768        let db = IndexDatabase::rebuild(&config).unwrap();
6769        let commit = db
6770            .storage
6771            .connection()
6772            .query_row("SELECT hash FROM git_commits LIMIT 1", [], |row| row.get::<_, String>(0))
6773            .unwrap();
6774        let mock = MockGitHubClient;
6775        github::sync_from_refs(db.storage.connection(), &root, Some(&mock), false).unwrap();
6776
6777        let papertrail = db.papertrail_for_commit(&commit[..7], 10).unwrap();
6778        assert_eq!(papertrail.github_evidence.first().map(|item| item.number), Some(42));
6779        assert_eq!(
6780            papertrail.github_evidence.first().map(|item| item.evidence_kind),
6781            Some("literal_github_ref")
6782        );
6783        assert!(
6784            papertrail.fallback_github_evidence.is_empty(),
6785            "structured commit refs should suppress noisy fallback evidence: {papertrail:?}"
6786        );
6787
6788        fs::remove_dir_all(root).unwrap();
6789    }
6790
6791    #[test]
6792    fn papertrail_for_symbol_dedupes_duplicate_file_refs() {
6793        let root = unique_temp_root();
6794        let _ = fs::remove_dir_all(&root);
6795        fs::create_dir_all(root.join("src")).unwrap();
6796        fs::write(
6797            root.join("src/lib.rs"),
6798            "// First rationale (#42)\n// Second rationale (#42)\npub fn tracked_symbol() {}\n",
6799        )
6800        .unwrap();
6801        let config = source_config(root.clone(), Language::Rust);
6802        let db = IndexDatabase::rebuild(&config).unwrap();
6803        let mock = MockGitHubClient;
6804        github::sync_from_refs(db.storage.connection(), &root, Some(&mock), false).unwrap();
6805        let papertrail = db
6806            .papertrail_for_symbol("tracked_symbol", Some(Language::Rust), 10)
6807            .unwrap()
6808            .expect("tracked symbol papertrail");
6809
6810        assert_eq!(
6811            papertrail
6812                .github_evidence
6813                .iter()
6814                .filter(|item| item.number == 42 && item.item_kind == "issue")
6815                .count(),
6816            1,
6817            "duplicate #42 refs in one file should collapse to one issue evidence row: {papertrail:?}"
6818        );
6819
6820        fs::remove_dir_all(root).unwrap();
6821    }
6822
6823    #[test]
6824    fn github_sync_keeps_partial_cache_and_skips_synced_refs_after_404() {
6825        let (root, config) = markdown_config(
6826            "# Decision\nRefs cq27-dev/rag-rat#42 and cq27-dev/rag-rat#404\nwe will keep sqlite\n",
6827        );
6828        let db = IndexDatabase::rebuild(&config).unwrap();
6829        let mock = PartiallyFailingGitHubClient;
6830
6831        let report =
6832            github::sync_from_refs(db.storage.connection(), &root, Some(&mock), false).unwrap();
6833        assert_eq!(report.discovered_refs, 2);
6834        assert_eq!(report.synced_items, 5);
6835        assert_eq!(report.failed_refs, 1);
6836        assert_eq!(report.errors.len(), 1);
6837        assert_eq!(report.errors[0].number, 404);
6838        assert_eq!(report.errors[0].status, "not_found");
6839
6840        let issue_hits = db.github_issue_search("sqlite", 10).unwrap();
6841        assert_eq!(issue_hits.len(), 1);
6842        assert_eq!(issue_hits[0].number, 42);
6843
6844        let second =
6845            github::sync_from_refs(db.storage.connection(), &root, Some(&mock), false).unwrap();
6846        assert_eq!(second.synced_items, 0);
6847        assert_eq!(second.skipped_refs, 2);
6848        assert_eq!(second.failed_refs, 0);
6849
6850        fs::remove_dir_all(root).unwrap();
6851    }
6852
6853    #[test]
6854    fn search_recovers_when_fts_is_marked_dirty() {
6855        let (root, config) = markdown_config("alpha token");
6856        let db = IndexDatabase::rebuild(&config).unwrap();
6857        db.mark_fts_dirty().unwrap();
6858
6859        let dirty = db.status(&config.database).unwrap();
6860        assert!(dirty.fts_dirty);
6861        assert!(!dirty.fts_fresh);
6862
6863        let hits = db.search("alpha", 10, false).unwrap();
6864        assert_eq!(hits.len(), 1);
6865        assert_eq!(hits[0].summary, "alpha token");
6866        let fresh = db.status(&config.database).unwrap();
6867        assert!(!fresh.fts_dirty);
6868        assert!(fresh.fts_fresh);
6869
6870        fs::remove_dir_all(root).unwrap();
6871    }
6872
6873    #[test]
6874    fn read_chunk_relocates_small_line_drift_to_current_text() {
6875        let (root, config) = markdown_config("# Title\nalpha token\n");
6876        let db = IndexDatabase::rebuild(&config).unwrap();
6877        let chunk_id = first_chunk_id(&db);
6878        fs::write(root.join("docs/search.md"), "inserted\n# Title\nalpha token\n").unwrap();
6879
6880        let chunk = db.read_chunk(chunk_id).unwrap().unwrap();
6881        assert_eq!(chunk.start_line, 2);
6882        assert_eq!(chunk.end_line, 3);
6883        assert_eq!(chunk.text, "# Title\nalpha token\n");
6884
6885        fs::remove_dir_all(root).unwrap();
6886    }
6887
6888    #[test]
6889    fn read_chunk_large_drift_reindexes_and_reports_stale_chunk() {
6890        let (root, config) = markdown_config("# Title\nalpha token\n");
6891        let db = IndexDatabase::rebuild(&config).unwrap();
6892        let chunk_id = first_chunk_id(&db);
6893        fs::write(root.join("docs/search.md"), "# Replacement\nbeta token\n").unwrap();
6894
6895        let err = db.read_chunk(chunk_id).unwrap_err().to_string();
6896        assert!(err.contains("StaleChunk"), "{err}");
6897        let hits = db.search("beta", 10, false).unwrap();
6898        assert_eq!(hits.len(), 1);
6899        assert!(db.search("alpha", 10, false).unwrap().is_empty());
6900
6901        fs::remove_dir_all(root).unwrap();
6902    }
6903
6904    #[test]
6905    fn search_retries_after_healing_stale_hit() {
6906        let (root, config) = markdown_config("# Title\nalpha token\n");
6907        let db = IndexDatabase::rebuild(&config).unwrap();
6908        fs::write(root.join("docs/search.md"), "# Title\nbeta token\n").unwrap();
6909
6910        let hits = db.search("alpha", 10, false).unwrap();
6911        assert!(hits.is_empty());
6912        let beta_hits = db.search("beta", 10, false).unwrap();
6913        assert_eq!(beta_hits.len(), 1);
6914        assert!(beta_hits[0].summary.contains("beta"));
6915
6916        fs::remove_dir_all(root).unwrap();
6917    }
6918
6919    #[test]
6920    fn search_heals_relocated_hits_before_returning_line_spans() {
6921        let (root, config) = markdown_config("# Title\nalpha token\n");
6922        let db = IndexDatabase::rebuild(&config).unwrap();
6923        fs::write(root.join("docs/search.md"), "inserted\n# Title\nalpha token\n").unwrap();
6924
6925        let hits = db.search("alpha", 10, false).unwrap();
6926        assert_eq!(hits.len(), 1);
6927        assert_eq!(hits[0].start_line, 2);
6928        assert_eq!(hits[0].end_line, 3);
6929        assert!(hits[0].summary.contains("alpha"));
6930
6931        fs::remove_dir_all(root).unwrap();
6932    }
6933
6934    #[test]
6935    fn read_chunk_deleted_source_reports_gone() {
6936        let (root, config) = markdown_config("# Title\nalpha token\n");
6937        let db = IndexDatabase::rebuild(&config).unwrap();
6938        let chunk_id = first_chunk_id(&db);
6939        fs::remove_file(root.join("docs/search.md")).unwrap();
6940
6941        let err = db.read_chunk(chunk_id).unwrap_err().to_string();
6942        assert!(err.contains("Gone"), "{err}");
6943        assert!(db.search("alpha", 10, false).unwrap().is_empty());
6944
6945        fs::remove_dir_all(root).unwrap();
6946    }
6947
6948    #[test]
6949    fn search_returns_needs_reindex_when_heal_cap_is_exceeded() {
6950        let root = unique_temp_root();
6951        let _ = fs::remove_dir_all(&root);
6952        let docs = root.join("docs");
6953        fs::create_dir_all(&docs).unwrap();
6954        for index in 0..=MAX_AUTO_HEAL_FILES_PER_CALL {
6955            fs::write(docs.join(format!("doc-{index}.md")), "common stale token\n").unwrap();
6956        }
6957        let config = markdown_config_for_root(root.clone());
6958        let db = IndexDatabase::rebuild(&config).unwrap();
6959        for index in 0..=MAX_AUTO_HEAL_FILES_PER_CALL {
6960            fs::write(docs.join(format!("doc-{index}.md")), "fresh replacement token\n").unwrap();
6961        }
6962
6963        let err = db.search("common", 20, false).unwrap_err().to_string();
6964        assert!(err.contains("needs_reindex"), "{err}");
6965
6966        fs::remove_dir_all(root).unwrap();
6967    }
6968
6969    #[test]
6970    fn heal_index_limit_does_not_warn_when_only_fresh_files_are_skipped() {
6971        let root = unique_temp_root();
6972        let _ = fs::remove_dir_all(&root);
6973        let docs = root.join("docs");
6974        fs::create_dir_all(&docs).unwrap();
6975        fs::write(docs.join("one.md"), "one fresh token\n").unwrap();
6976        fs::write(docs.join("two.md"), "two fresh token\n").unwrap();
6977        let config = markdown_config_for_root(root.clone());
6978        let db = IndexDatabase::rebuild(&config).unwrap();
6979
6980        let report = db.heal_index(Some(1)).unwrap();
6981
6982        assert_eq!(report.healed_files, 0);
6983        assert_eq!(report.removed_files, 0);
6984        assert_eq!(report.skipped_files, 2);
6985        assert_eq!(report.message, None);
6986
6987        fs::remove_dir_all(root).unwrap();
6988    }
6989
6990    #[test]
6991    fn search_recovers_when_fts_revision_is_stale() {
6992        let (root, config) = markdown_config("alpha token");
6993        let db = IndexDatabase::rebuild(&config).unwrap();
6994        db.set_meta("fts_source_revision", "stale").unwrap();
6995
6996        let stale = db.status(&config.database).unwrap();
6997        assert!(!stale.fts_dirty);
6998        assert!(!stale.fts_fresh);
6999
7000        let hits = db.search("alpha", 10, false).unwrap();
7001        assert_eq!(hits.len(), 1);
7002        let fresh = db.status(&config.database).unwrap();
7003        assert_eq!(fresh.fts_source_revision.as_deref(), Some(fresh.content_revision.as_str()));
7004        assert!(fresh.fts_fresh);
7005
7006        fs::remove_dir_all(root).unwrap();
7007    }
7008
7009    #[test]
7010    fn parser_failures_report_paths() {
7011        let root = unique_temp_root();
7012        let _ = fs::remove_dir_all(&root);
7013        let src = root.join("src");
7014        fs::create_dir_all(&src).unwrap();
7015        fs::write(src.join("broken.rs"), "pub fn broken(").unwrap();
7016        let config = Config {
7017            root: root.clone(),
7018            database: root.join(".rag-rat/index.sqlite"),
7019            targets: vec![ResolvedTarget {
7020                name: "rust".to_string(),
7021                language: Language::Rust,
7022                directories: vec![PathBuf::from("src")],
7023                include: vec!["**/*.rs".to_string()],
7024                exclude: Vec::new(),
7025                kind: TargetKind::Source,
7026            }],
7027            local_ai: Default::default(),
7028            watch: Default::default(),
7029        };
7030
7031        let db = IndexDatabase::rebuild(&config).unwrap();
7032        let status = db.status(&config.database).unwrap();
7033        assert_eq!(status.parser_failures, 1);
7034        assert_eq!(status.parser_failure_paths[0].path, "src/broken.rs");
7035
7036        fs::remove_dir_all(root).unwrap();
7037    }
7038
7039    #[test]
7040    fn repo_memory_bound_to_logical_symbol_surfaces_in_symbol_chunk_and_impact() {
7041        let root = unique_temp_root();
7042        let _ = fs::remove_dir_all(&root);
7043        fs::create_dir_all(root.join("src")).unwrap();
7044        fs::write(
7045            root.join("src/lib.rs"),
7046            "#[cfg(unix)]\npub fn cfg_helper() {}\n#[cfg(windows)]\npub fn cfg_helper() {}\n",
7047        )
7048        .unwrap();
7049        let config = source_config(root.clone(), Language::Rust);
7050        let db = IndexDatabase::rebuild(&config).unwrap();
7051        let symbol = db
7052            .select_symbol(&crate::query::symbol::SymbolSelector {
7053                logical_symbol_id: None,
7054                symbol_id: None,
7055                symbol_path: None,
7056                symbol: Some("cfg_helper".to_string()),
7057                language: Some(Language::Rust),
7058                allow_ambiguous: true,
7059                limit: 10,
7060            })
7061            .unwrap()
7062            .unwrap()
7063            .expect("selected symbol");
7064        let logical_symbol_id = symbol.logical_symbol_id.expect("logical symbol id");
7065
7066        let created = db
7067            .memory_create(crate::query::memory::RepoMemoryCreate {
7068                kind: "Invariant".to_string(),
7069                title: "Treat cfg helper variants as one logical helper".to_string(),
7070                body: "Caller and impact analysis should use the logical symbol, not one cfg body variant."
7071                    .to_string(),
7072                confidence: "high".to_string(),
7073                created_by: Some("test-agent".to_string()),
7074                source: Some("agent".to_string()),
7075                tags: vec!["cfg".to_string(), "graph".to_string()],
7076                bind: crate::query::memory::RepoMemoryBindTarget {
7077                    logical_symbol_id: Some(logical_symbol_id),
7078                    symbol_id: None,
7079                    chunk_id: None,
7080                    edge_id: None,
7081                    path: None,
7082                    start_line: None,
7083                    end_line: None,
7084                    commit_hash: None,
7085                    github_owner: None,
7086                    github_repo: None,
7087                    github_number: None,
7088                    start_logical_symbol_id: None,
7089                    end_logical_symbol_id: None,
7090                    edge_sequence_hash: None,
7091                    path_summary: None,
7092                },
7093            })
7094            .unwrap();
7095        assert!(!created.duplicate);
7096        assert_eq!(created.memory.bindings[0].binding_kind, "logical_symbol");
7097
7098        let memories = db.memory_for_symbol(&symbol, 10).unwrap();
7099        assert_eq!(memories.len(), 1);
7100        assert_eq!(memories[0].kind, "Invariant");
7101        let chunk_id = memories[0].bindings[0].chunk_id.expect("bound chunk");
7102        let chunk = db.read_chunk(chunk_id).unwrap().expect("memory chunk");
7103        assert_eq!(chunk.memories.len(), 1);
7104        assert_eq!(chunk.memories[0].memory_id, created.memory.memory_id);
7105
7106        let impact = db
7107            .impact_surface_report_for_selected_symbol(
7108                &symbol,
7109                10,
7110                &crate::query::impact::ImpactSurfaceOptions::default(),
7111            )
7112            .unwrap();
7113        assert_eq!(impact.repo_memories.direct.len(), 1);
7114        assert_eq!(impact.completeness_and_caveats.memory_status.active, 1);
7115        assert_eq!(impact.completeness_and_caveats.memory_status.stale, 0);
7116
7117        fs::remove_dir_all(root).unwrap();
7118    }
7119
7120    #[test]
7121    fn repo_memory_survives_reindex_and_relocates_when_symbol_moves() {
7122        // The user-facing guarantee: a memory is never lost to reindexing (no FK cascade from
7123        // symbols/chunks), and a symbol binding re-anchors to the symbol's new location when the
7124        // file is edited/moved rather than going stale.
7125        let root = unique_temp_root();
7126        let _ = fs::remove_dir_all(&root);
7127        fs::create_dir_all(root.join("src")).unwrap();
7128        fs::write(root.join("src/lib.rs"), "pub fn keystone() {}\n").unwrap();
7129        let config = source_config(root.clone(), Language::Rust);
7130        let db = IndexDatabase::rebuild(&config).unwrap();
7131
7132        let selector = crate::query::symbol::SymbolSelector {
7133            logical_symbol_id: None,
7134            symbol_id: None,
7135            symbol_path: None,
7136            symbol: Some("keystone".to_string()),
7137            language: Some(Language::Rust),
7138            allow_ambiguous: false,
7139            limit: 10,
7140        };
7141        let symbol = db.select_symbol(&selector).unwrap().unwrap().expect("symbol");
7142        let created = db
7143            .memory_create(crate::query::memory::RepoMemoryCreate {
7144                kind: "Invariant".to_string(),
7145                title: "keystone holds an invariant".to_string(),
7146                body: "This memory must survive a reindex and follow the symbol when it moves."
7147                    .to_string(),
7148                confidence: "high".to_string(),
7149                created_by: Some("test".to_string()),
7150                source: Some("agent".to_string()),
7151                tags: Vec::new(),
7152                bind: crate::query::memory::RepoMemoryBindTarget {
7153                    symbol_id: Some(symbol.symbol_id),
7154                    logical_symbol_id: None,
7155                    chunk_id: None,
7156                    edge_id: None,
7157                    path: None,
7158                    start_line: None,
7159                    end_line: None,
7160                    commit_hash: None,
7161                    github_owner: None,
7162                    github_repo: None,
7163                    github_number: None,
7164                    start_logical_symbol_id: None,
7165                    end_logical_symbol_id: None,
7166                    edge_sequence_hash: None,
7167                    path_summary: None,
7168                },
7169            })
7170            .unwrap();
7171
7172        // Edit the file so keystone moves down (new symbol ids on reindex), then rebuild.
7173        fs::write(root.join("src/lib.rs"), "pub fn added_above() {}\n\npub fn keystone() {}\n")
7174            .unwrap();
7175        let db = IndexDatabase::rebuild(&config).unwrap();
7176
7177        // Memory row survives the reindex (no cascade from deleted symbols).
7178        assert!(
7179            crate::query::memory::memory_by_id(db.storage.connection(), &created.memory.memory_id,)
7180                .unwrap()
7181                .is_some(),
7182            "memory was lost to reindex",
7183        );
7184
7185        // Re-validation re-anchors the binding to keystone's new location, not "gone".
7186        db.memory_validate().unwrap();
7187        let symbol = db.select_symbol(&selector).unwrap().unwrap().expect("symbol after move");
7188        let anchored = db.memory_for_symbol(&symbol, 10).unwrap();
7189        assert_eq!(anchored.len(), 1, "memory did not re-anchor to moved symbol");
7190        assert_ne!(anchored[0].bindings[0].anchor_status, "gone");
7191
7192        fs::remove_dir_all(root).unwrap();
7193    }
7194
7195    #[test]
7196    fn repo_memory_validate_marks_changed_or_missing_anchors_non_current() {
7197        let root = unique_temp_root();
7198        let _ = fs::remove_dir_all(&root);
7199        fs::create_dir_all(root.join("src")).unwrap();
7200        fs::write(root.join("src/lib.rs"), "pub fn anchored_memory() {}\n").unwrap();
7201        let config = source_config(root.clone(), Language::Rust);
7202        let db = IndexDatabase::rebuild(&config).unwrap();
7203        let symbol = db
7204            .select_symbol(&crate::query::symbol::SymbolSelector {
7205                logical_symbol_id: None,
7206                symbol_id: None,
7207                symbol_path: None,
7208                symbol: Some("anchored_memory".to_string()),
7209                language: Some(Language::Rust),
7210                allow_ambiguous: false,
7211                limit: 10,
7212            })
7213            .unwrap()
7214            .unwrap()
7215            .expect("selected symbol");
7216        let chunk_id = db
7217            .storage
7218            .connection()
7219            .query_row(
7220                "
7221                SELECT chunks.id
7222                FROM chunks
7223                JOIN files ON files.id = chunks.file_id
7224                WHERE files.path = ?1 AND chunks.symbol_path = ?2
7225                LIMIT 1
7226                ",
7227                params![symbol.path, symbol.qualified_name],
7228                |row| row.get::<_, i64>(0),
7229            )
7230            .unwrap();
7231        let created = db
7232            .memory_create(crate::query::memory::RepoMemoryCreate {
7233                kind: "Risk".to_string(),
7234                title: "Anchor must become stale when source hash changes".to_string(),
7235                body: "Validation should separate stale memories from current repo evidence."
7236                    .to_string(),
7237                confidence: "medium".to_string(),
7238                created_by: Some("test-agent".to_string()),
7239                source: Some("agent".to_string()),
7240                tags: Vec::new(),
7241                bind: crate::query::memory::RepoMemoryBindTarget {
7242                    logical_symbol_id: None,
7243                    symbol_id: None,
7244                    chunk_id: Some(chunk_id),
7245                    edge_id: None,
7246                    path: None,
7247                    start_line: None,
7248                    end_line: None,
7249                    commit_hash: None,
7250                    github_owner: None,
7251                    github_repo: None,
7252                    github_number: None,
7253                    start_logical_symbol_id: None,
7254                    end_logical_symbol_id: None,
7255                    edge_sequence_hash: None,
7256                    path_summary: None,
7257                },
7258            })
7259            .unwrap();
7260
7261        db.storage
7262            .connection()
7263            .execute("UPDATE chunks SET text_hash = 'changed' WHERE id = ?1", [chunk_id])
7264            .unwrap();
7265        let report = db.memory_validate().unwrap();
7266        assert_eq!(report.stale, 1);
7267        let stale = db.memory_for_symbol(&symbol, 10).unwrap();
7268        assert_eq!(stale[0].memory_id, created.memory.memory_id);
7269        assert_eq!(stale[0].bindings[0].anchor_status, "stale");
7270
7271        db.storage.connection().execute("DELETE FROM chunks WHERE id = ?1", [chunk_id]).unwrap();
7272        let report = db.memory_validate().unwrap();
7273        assert_eq!(report.gone, 1);
7274        let gone = db.memory_for_symbol(&symbol, 10).unwrap();
7275        assert_eq!(gone[0].bindings[0].anchor_status, "gone");
7276
7277        fs::remove_dir_all(root).unwrap();
7278    }
7279
7280    #[test]
7281    fn repo_memory_bound_to_edge_surfaces_when_impact_crosses_call_path() {
7282        let root = unique_temp_root();
7283        let _ = fs::remove_dir_all(&root);
7284        fs::create_dir_all(root.join("src")).unwrap();
7285        fs::write(
7286            root.join("src/lib.rs"),
7287            "pub fn target_edge() {}\npub fn caller_edge() {\n    target_edge();\n}\n",
7288        )
7289        .unwrap();
7290        let config = source_config(root.clone(), Language::Rust);
7291        let db = IndexDatabase::rebuild(&config).unwrap();
7292        let target = db
7293            .select_symbol(&crate::query::symbol::SymbolSelector {
7294                logical_symbol_id: None,
7295                symbol_id: None,
7296                symbol_path: None,
7297                symbol: Some("target_edge".to_string()),
7298                language: Some(Language::Rust),
7299                allow_ambiguous: false,
7300                limit: 10,
7301            })
7302            .unwrap()
7303            .unwrap()
7304            .expect("selected target");
7305        let graph_options = crate::query::graph::GraphTraversalOptions {
7306            resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
7307            symbol_id: Some(target.symbol_id),
7308            logical_symbol_id: target.logical_symbol_id,
7309            ..Default::default()
7310        };
7311        let callers =
7312            db.graph_traversal_report("find_callers", &target, true, 10, &graph_options).unwrap();
7313        let edge_id = callers.results[0].edge_id;
7314
7315        let edge_memory = db
7316            .memory_create(crate::query::memory::RepoMemoryCreate {
7317                kind: "Risk".to_string(),
7318                title: "caller_edge to target_edge must stay synchronous".to_string(),
7319                body: "This specific call path is used to prove edge-bound memories surface when impact crosses the edge."
7320                    .to_string(),
7321                confidence: "high".to_string(),
7322                created_by: Some("test-agent".to_string()),
7323                source: Some("agent".to_string()),
7324                tags: vec!["edge".to_string()],
7325                bind: crate::query::memory::RepoMemoryBindTarget {
7326                    logical_symbol_id: None,
7327                    symbol_id: None,
7328                    chunk_id: None,
7329                    edge_id: Some(edge_id),
7330                    path: None,
7331                    start_line: None,
7332                    end_line: None,
7333                    commit_hash: None,
7334                    github_owner: None,
7335                    github_repo: None,
7336                    github_number: None,
7337                    start_logical_symbol_id: None,
7338                    end_logical_symbol_id: None,
7339                    edge_sequence_hash: None,
7340                    path_summary: None,
7341                },
7342            })
7343            .unwrap();
7344        assert_eq!(edge_memory.memory.bindings[0].binding_kind, "edge");
7345        assert_eq!(edge_memory.memory.bindings[0].edge_id, Some(edge_id));
7346
7347        let impact = db
7348            .impact_surface_report_for_selected_symbol(
7349                &target,
7350                10,
7351                &crate::query::impact::ImpactSurfaceOptions {
7352                    resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
7353                    ..Default::default()
7354                },
7355            )
7356            .unwrap();
7357        assert!(impact.repo_memories.direct.is_empty());
7358        assert_eq!(impact.repo_memories.path_crossed.len(), 1);
7359        assert_eq!(impact.repo_memories.path_crossed[0].memory_id, edge_memory.memory.memory_id);
7360        assert_eq!(impact.completeness_and_caveats.memory_status.active, 1);
7361
7362        let call_path_memory = db
7363            .memory_create(crate::query::memory::RepoMemoryCreate {
7364                kind: "TestExpectation".to_string(),
7365                title: "caller_edge path hash recall".to_string(),
7366                body: "Call-path memories are addressable by a deterministic edge sequence hash."
7367                    .to_string(),
7368                confidence: "medium".to_string(),
7369                created_by: Some("test-agent".to_string()),
7370                source: Some("agent".to_string()),
7371                tags: vec!["call-path".to_string()],
7372                bind: crate::query::memory::RepoMemoryBindTarget {
7373                    logical_symbol_id: None,
7374                    symbol_id: None,
7375                    chunk_id: None,
7376                    edge_id: None,
7377                    path: None,
7378                    start_line: None,
7379                    end_line: None,
7380                    commit_hash: None,
7381                    github_owner: None,
7382                    github_repo: None,
7383                    github_number: None,
7384                    start_logical_symbol_id: target.logical_symbol_id,
7385                    end_logical_symbol_id: target.logical_symbol_id,
7386                    edge_sequence_hash: Some("edge-sequence-test-hash".to_string()),
7387                    path_summary: Some("caller_edge -> target_edge".to_string()),
7388                },
7389            })
7390            .unwrap();
7391        let call_path = db.memory_for_call_path_hash("edge-sequence-test-hash", 10).unwrap();
7392        assert_eq!(call_path.len(), 1);
7393        assert_eq!(call_path[0].memory_id, call_path_memory.memory.memory_id);
7394        assert_eq!(call_path[0].call_paths[0].path_summary, "caller_edge -> target_edge");
7395
7396        fs::remove_dir_all(root).unwrap();
7397    }
7398
7399    #[test]
7400    fn repo_brief_ranks_churn_and_god_module_candidates() {
7401        let root = unique_temp_root();
7402        let _ = fs::remove_dir_all(&root);
7403        fs::create_dir_all(root.join("src")).unwrap();
7404        run_git(&root, &["init"]);
7405        run_git(&root, &["config", "user.name", "Rag Rat"]);
7406        run_git(&root, &["config", "user.email", "rag@example.com"]);
7407
7408        fs::write(root.join("src/stable.rs"), "pub fn stable() -> i32 { 1 }\n").unwrap();
7409        fs::write(root.join("src/hot.rs"), hot_module_text(0)).unwrap();
7410        run_git(&root, &["add", "."]);
7411        run_git(&root, &["commit", "-m", "Add initial modules"]);
7412
7413        for revision in 1..=3 {
7414            fs::write(root.join("src/hot.rs"), hot_module_text(revision)).unwrap();
7415            run_git(&root, &["add", "src/hot.rs"]);
7416            run_git(&root, &["commit", "-m", "Iterate hot module"]);
7417        }
7418
7419        let config = Config {
7420            root: root.clone(),
7421            database: root.join(".rag-rat/index.sqlite"),
7422            targets: vec![ResolvedTarget {
7423                name: "rust".to_string(),
7424                language: Language::Rust,
7425                directories: vec![PathBuf::from("src")],
7426                include: vec!["**/*.rs".to_string()],
7427                exclude: Vec::new(),
7428                kind: TargetKind::Source,
7429            }],
7430            local_ai: Default::default(),
7431            watch: Default::default(),
7432        };
7433        let db = IndexDatabase::rebuild(&config).unwrap();
7434
7435        let churn = db
7436            .repo_brief(crate::query::repo_brief::RepoBriefOptions {
7437                mode: crate::query::repo_brief::RepoBriefMode::Churn,
7438                limit: 1,
7439                include_generated: false,
7440                include_memories: true,
7441            })
7442            .unwrap();
7443        assert_eq!(churn.candidates[0].path, "src/hot.rs");
7444        assert_eq!(churn.candidates[0].category, "recent_churn_hotspot");
7445        assert!(churn.candidates[0].score <= 1.0);
7446        assert!(churn.candidates[0].metrics.commit_touch_count >= 4);
7447        assert!(churn.candidates[0].why.iter().any(|reason| reason.contains("churn")));
7448
7449        let god_modules = db
7450            .repo_brief(crate::query::repo_brief::RepoBriefOptions {
7451                mode: crate::query::repo_brief::RepoBriefMode::GodModules,
7452                limit: 1,
7453                include_generated: false,
7454                include_memories: true,
7455            })
7456            .unwrap();
7457        assert_eq!(god_modules.candidates[0].path, "src/hot.rs");
7458        assert!(god_modules.candidates[0].score <= 1.0);
7459        assert!(god_modules.candidates[0].metrics.symbol_count >= 30);
7460        assert!(!god_modules.candidates[0].split_hints.is_empty());
7461        assert!(
7462            god_modules.candidates[0].next_tools.iter().any(|tool| tool.tool == "impact_surface")
7463        );
7464
7465        fs::remove_dir_all(root).unwrap();
7466    }
7467
7468    #[test]
7469    fn repo_clusters_groups_cotouched_files() {
7470        let root = unique_temp_root();
7471        let _ = fs::remove_dir_all(&root);
7472        fs::create_dir_all(root.join("src/sync")).unwrap();
7473        fs::create_dir_all(root.join("src/ui")).unwrap();
7474        run_git(&root, &["init"]);
7475        run_git(&root, &["config", "user.name", "Rag Rat"]);
7476        run_git(&root, &["config", "user.email", "rag@example.com"]);
7477
7478        fs::write(root.join("src/sync/actor.rs"), "pub fn sync_actor() -> i32 { 1 }\n").unwrap();
7479        fs::write(root.join("src/sync/msg.rs"), "pub fn sync_msg() -> i32 { 2 }\n").unwrap();
7480        fs::write(root.join("src/ui/app.rs"), "pub fn ui_app() -> i32 { 3 }\n").unwrap();
7481        run_git(&root, &["add", "."]);
7482        run_git(&root, &["commit", "-m", "Add modules"]);
7483
7484        for revision in 1..=2 {
7485            fs::write(
7486                root.join("src/sync/actor.rs"),
7487                format!("pub fn sync_actor() -> i32 {{ {revision} }}\n"),
7488            )
7489            .unwrap();
7490            fs::write(
7491                root.join("src/sync/msg.rs"),
7492                format!("pub fn sync_msg() -> i32 {{ {} }}\n", revision + 10),
7493            )
7494            .unwrap();
7495            run_git(&root, &["add", "src/sync/actor.rs", "src/sync/msg.rs"]);
7496            run_git(&root, &["commit", "-m", "Iterate sync modules"]);
7497        }
7498
7499        let config = Config {
7500            root: root.clone(),
7501            database: root.join(".rag-rat/index.sqlite"),
7502            targets: vec![ResolvedTarget {
7503                name: "rust".to_string(),
7504                language: Language::Rust,
7505                directories: vec![PathBuf::from("src")],
7506                include: vec!["**/*.rs".to_string()],
7507                exclude: Vec::new(),
7508                kind: TargetKind::Source,
7509            }],
7510            local_ai: Default::default(),
7511            watch: Default::default(),
7512        };
7513        let db = IndexDatabase::rebuild(&config).unwrap();
7514
7515        let clusters = db
7516            .repo_clusters(crate::query::clusters::RepoClustersOptions {
7517                limit: 5,
7518                include_generated: false,
7519                include_memories: true,
7520                min_cluster_size: 2,
7521            })
7522            .unwrap();
7523
7524        let sync_cluster = clusters
7525            .clusters
7526            .iter()
7527            .find(|cluster| cluster.name == "src/sync")
7528            .expect("sync cluster");
7529        assert!(sync_cluster.representative_paths.contains(&"src/sync/actor.rs".to_string()));
7530        assert!(sync_cluster.representative_paths.contains(&"src/sync/msg.rs".to_string()));
7531        assert!(sync_cluster.metrics.co_touch_edges >= 2);
7532
7533        fs::remove_dir_all(root).unwrap();
7534    }
7535
7536    fn hot_module_text(revision: usize) -> String {
7537        let mut text = String::new();
7538        text.push_str("pub fn entry() -> i32 {\n");
7539        for i in 0..32 {
7540            text.push_str(&format!("    helper_{i}() +\n"));
7541        }
7542        text.push_str(&format!("    {revision}\n}}\n"));
7543        for i in 0..32 {
7544            text.push_str(&format!("pub fn helper_{i}() -> i32 {{ {i} }}\n"));
7545        }
7546        text
7547    }
7548
7549    fn unique_temp_root() -> PathBuf {
7550        let mut root = std::env::temp_dir();
7551        let suffix = TEMP_COUNTER.fetch_add(1, Ordering::Relaxed);
7552        root.push(format!("rag-rat-schema-test-{}-{}-{suffix}", std::process::id(), now_ms()));
7553        root
7554    }
7555
7556    fn fixture_temp_root(fixture: &str) -> PathBuf {
7557        let root = unique_temp_root();
7558        let _ = fs::remove_dir_all(&root);
7559        let fixture_root =
7560            PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../tests/fixtures").join(fixture);
7561        copy_fixture_dir(&fixture_root, &root);
7562        root
7563    }
7564
7565    fn copy_fixture_dir(from: &Path, to: &Path) {
7566        fs::create_dir_all(to).unwrap();
7567        for entry in fs::read_dir(from).unwrap() {
7568            let entry = entry.unwrap();
7569            let from_path = entry.path();
7570            let to_path = to.join(entry.file_name());
7571            if from_path.is_dir() {
7572                copy_fixture_dir(&from_path, &to_path);
7573            } else {
7574                fs::copy(&from_path, &to_path).unwrap();
7575            }
7576        }
7577    }
7578
7579    fn markdown_config(text: &str) -> (PathBuf, Config) {
7580        let root = unique_temp_root();
7581        let _ = fs::remove_dir_all(&root);
7582        let docs = root.join("docs");
7583        fs::create_dir_all(&docs).unwrap();
7584        fs::write(docs.join("search.md"), text).unwrap();
7585        let config = markdown_config_for_root(root.clone());
7586        (root, config)
7587    }
7588
7589    fn markdown_config_for_root(root: PathBuf) -> Config {
7590        Config {
7591            root: root.clone(),
7592            database: root.join(".rag-rat/index.sqlite"),
7593            targets: vec![ResolvedTarget {
7594                name: "markdown".to_string(),
7595                language: Language::Markdown,
7596                directories: vec![PathBuf::from("docs")],
7597                include: vec!["**/*.md".to_string()],
7598                exclude: Vec::new(),
7599                kind: TargetKind::Docs,
7600            }],
7601            local_ai: Default::default(),
7602            watch: Default::default(),
7603        }
7604    }
7605
7606    fn source_config(root: PathBuf, language: Language) -> Config {
7607        Config {
7608            root: root.clone(),
7609            database: root.join(".rag-rat/index.sqlite"),
7610            targets: vec![ResolvedTarget {
7611                name: language.as_str().to_string(),
7612                language,
7613                directories: vec![PathBuf::from("src")],
7614                include: vec!["src/".to_string()],
7615                exclude: Vec::new(),
7616                kind: TargetKind::Source,
7617            }],
7618            local_ai: Default::default(),
7619            watch: Default::default(),
7620        }
7621    }
7622
7623    fn assert_edge(db: &IndexDatabase, from: &str, to: &str, edge_kind: &str, confidence: &str) {
7624        let count = db
7625            .storage
7626            .connection()
7627            .query_row(
7628                "
7629                SELECT COUNT(*)
7630                FROM edges
7631                WHERE edge_kind = ?1
7632                  AND confidence = ?2
7633                  AND COALESCE(from_name, '') LIKE ?3
7634                  AND to_name LIKE ?4
7635                ",
7636                params![edge_kind, confidence, format!("%{from}%"), format!("%{to}%")],
7637                |row| row.get::<_, i64>(0),
7638            )
7639            .unwrap();
7640        assert!(count > 0, "missing edge {from} -[{edge_kind}/{confidence}]-> {to}");
7641    }
7642
7643    #[test]
7644    fn rebuild_restores_durable_wal_after_bulk_build() {
7645        // The bulk rebuild drops to journal_mode=MEMORY + synchronous=OFF for speed; it MUST
7646        // restore durable WAL/NORMAL afterward so later writes (reconcile, the watcher) are safe.
7647        let root = unique_temp_root();
7648        let _ = fs::remove_dir_all(&root);
7649        fs::create_dir_all(root.join("src")).unwrap();
7650        fs::write(root.join("src/lib.rs"), "pub fn alpha() {}\npub fn beta() {}\n").unwrap();
7651        let config = source_config(root.clone(), Language::Rust);
7652        let db = IndexDatabase::rebuild(&config).unwrap();
7653
7654        let journal_mode: String = db
7655            .storage
7656            .connection()
7657            .query_row("PRAGMA journal_mode", [], |row| row.get(0))
7658            .unwrap();
7659        assert_eq!(journal_mode.to_lowercase(), "wal", "rebuild must restore WAL durability");
7660        let synchronous: i64 = db
7661            .storage
7662            .connection()
7663            .query_row("PRAGMA synchronous", [], |row| row.get(0))
7664            .unwrap();
7665        assert_eq!(synchronous, 1, "synchronous must be restored to NORMAL (=1)");
7666        // The index is intact and queryable after the bulk build.
7667        assert!(!db.symbols("alpha", Some(Language::Rust), 10).unwrap().is_empty());
7668
7669        fs::remove_dir_all(root).unwrap();
7670    }
7671
7672    fn table_count(db: &IndexDatabase, table: &str) -> i64 {
7673        db.storage
7674            .connection()
7675            .query_row("SELECT COUNT(*) FROM sqlite_master WHERE name = ?1", [table], |row| {
7676                row.get(0)
7677            })
7678            .unwrap()
7679    }
7680
7681    fn row_count(db: &IndexDatabase, table: &str) -> i64 {
7682        db.storage
7683            .connection()
7684            .query_row(&format!("SELECT COUNT(*) FROM {table}"), [], |row| row.get(0))
7685            .unwrap()
7686    }
7687
7688    fn chunk_columns(db: &IndexDatabase) -> Vec<String> {
7689        table_columns(db, "chunks")
7690    }
7691
7692    fn file_columns(db: &IndexDatabase) -> Vec<String> {
7693        table_columns(db, "files")
7694    }
7695
7696    fn table_columns(db: &IndexDatabase, table: &str) -> Vec<String> {
7697        let mut stmt =
7698            db.storage.connection().prepare(&format!("PRAGMA table_info({table})")).unwrap();
7699        stmt.query_map([], |row| row.get::<_, String>(1)).unwrap().map(Result::unwrap).collect()
7700    }
7701
7702    fn indexed_revision_count(db: &IndexDatabase) -> i64 {
7703        db.storage
7704            .connection()
7705            .query_row("SELECT COUNT(*) FROM files WHERE indexed_revision != ''", [], |row| {
7706                row.get(0)
7707            })
7708            .unwrap()
7709    }
7710
7711    fn chunk_source_revision_count(db: &IndexDatabase) -> i64 {
7712        db.storage
7713            .connection()
7714            .query_row("SELECT COUNT(*) FROM chunks WHERE source_revision != ''", [], |row| {
7715                row.get(0)
7716            })
7717            .unwrap()
7718    }
7719
7720    fn first_chunk_id(db: &IndexDatabase) -> i64 {
7721        db.storage
7722            .connection()
7723            .query_row("SELECT id FROM chunks ORDER BY id LIMIT 1", [], |row| row.get(0))
7724            .unwrap()
7725    }
7726
7727    fn run_git(root: &Path, args: &[&str]) {
7728        let output = Command::new("git").args(args).current_dir(root).output().unwrap();
7729        assert!(
7730            output.status.success(),
7731            "git {:?} failed\nstdout:\n{}\nstderr:\n{}",
7732            args,
7733            String::from_utf8_lossy(&output.stdout),
7734            String::from_utf8_lossy(&output.stderr)
7735        );
7736    }
7737
7738    struct MockGitHubClient;
7739
7740    impl github::GitHubClient for MockGitHubClient {
7741        fn issue(
7742            &self,
7743            owner: &str,
7744            repo: &str,
7745            number: i64,
7746        ) -> anyhow::Result<github::GitHubIssue> {
7747            Ok(github::GitHubIssue {
7748                owner: owner.to_string(),
7749                repo: repo.to_string(),
7750                number,
7751                html_url: format!("https://github.com/{owner}/{repo}/issues/{number}"),
7752                state: "open".to_string(),
7753                title: "Decision: keep sqlite".to_string(),
7754                body: "We decided sqlite is required for binary size.".to_string(),
7755                author: Some("octo".to_string()),
7756                created_at: Some("2026-01-01T00:00:00Z".to_string()),
7757                updated_at: Some("2026-01-02T00:00:00Z".to_string()),
7758                is_pull_request: true,
7759            })
7760        }
7761
7762        fn issue_comments(
7763            &self,
7764            owner: &str,
7765            repo: &str,
7766            number: i64,
7767        ) -> anyhow::Result<Vec<github::GitHubComment>> {
7768            Ok(vec![github::GitHubComment {
7769                id: 4201,
7770                owner: owner.to_string(),
7771                repo: repo.to_string(),
7772                number,
7773                html_url: format!("https://github.com/{owner}/{repo}/issues/{number}#comment-1"),
7774                body: "Rejected alternative: duckdb was too large.".to_string(),
7775                author: Some("octo".to_string()),
7776                created_at: Some("2026-01-01T01:00:00Z".to_string()),
7777                updated_at: Some("2026-01-01T01:00:00Z".to_string()),
7778            }])
7779        }
7780
7781        fn pull(
7782            &self,
7783            owner: &str,
7784            repo: &str,
7785            number: i64,
7786        ) -> anyhow::Result<Option<github::GitHubPullRequest>> {
7787            Ok(Some(github::GitHubPullRequest {
7788                owner: owner.to_string(),
7789                repo: repo.to_string(),
7790                number,
7791                html_url: format!("https://github.com/{owner}/{repo}/pull/{number}"),
7792                state: "open".to_string(),
7793                title: "Use sqlite".to_string(),
7794                body: "Constraint: normal queries must use cache only.".to_string(),
7795                author: Some("octo".to_string()),
7796                created_at: Some("2026-01-01T00:00:00Z".to_string()),
7797                updated_at: Some("2026-01-02T00:00:00Z".to_string()),
7798                merged_at: None,
7799            }))
7800        }
7801
7802        fn pull_reviews(
7803            &self,
7804            owner: &str,
7805            repo: &str,
7806            number: i64,
7807        ) -> anyhow::Result<Vec<github::GitHubReview>> {
7808            Ok(vec![github::GitHubReview {
7809                id: 4202,
7810                owner: owner.to_string(),
7811                repo: repo.to_string(),
7812                number,
7813                html_url: Some(format!("https://github.com/{owner}/{repo}/pull/{number}#review")),
7814                state: "COMMENTED".to_string(),
7815                body: "Risk: live crawling during search would be surprising.".to_string(),
7816                author: Some("reviewer".to_string()),
7817                submitted_at: Some("2026-01-01T02:00:00Z".to_string()),
7818            }])
7819        }
7820
7821        fn pull_review_comments(
7822            &self,
7823            owner: &str,
7824            repo: &str,
7825            number: i64,
7826        ) -> anyhow::Result<Vec<github::GitHubReviewComment>> {
7827            Ok(vec![github::GitHubReviewComment {
7828                id: 4203,
7829                owner: owner.to_string(),
7830                repo: repo.to_string(),
7831                number,
7832                path: Some("docs/search.md".to_string()),
7833                html_url: format!("https://github.com/{owner}/{repo}/pull/{number}#discussion"),
7834                body: "No longer use obsolete duckdb rationale.".to_string(),
7835                author: Some("reviewer".to_string()),
7836                created_at: Some("2026-01-01T03:00:00Z".to_string()),
7837                updated_at: Some("2026-01-01T03:00:00Z".to_string()),
7838            }])
7839        }
7840    }
7841
7842    struct PartiallyFailingGitHubClient;
7843
7844    impl github::GitHubClient for PartiallyFailingGitHubClient {
7845        fn issue(
7846            &self,
7847            owner: &str,
7848            repo: &str,
7849            number: i64,
7850        ) -> anyhow::Result<github::GitHubIssue> {
7851            if number == 404 {
7852                anyhow::bail!("gh: Not Found (HTTP 404)");
7853            }
7854            MockGitHubClient.issue(owner, repo, number)
7855        }
7856
7857        fn issue_comments(
7858            &self,
7859            owner: &str,
7860            repo: &str,
7861            number: i64,
7862        ) -> anyhow::Result<Vec<github::GitHubComment>> {
7863            MockGitHubClient.issue_comments(owner, repo, number)
7864        }
7865
7866        fn pull(
7867            &self,
7868            owner: &str,
7869            repo: &str,
7870            number: i64,
7871        ) -> anyhow::Result<Option<github::GitHubPullRequest>> {
7872            MockGitHubClient.pull(owner, repo, number)
7873        }
7874
7875        fn pull_reviews(
7876            &self,
7877            owner: &str,
7878            repo: &str,
7879            number: i64,
7880        ) -> anyhow::Result<Vec<github::GitHubReview>> {
7881            MockGitHubClient.pull_reviews(owner, repo, number)
7882        }
7883
7884        fn pull_review_comments(
7885            &self,
7886            owner: &str,
7887            repo: &str,
7888            number: i64,
7889        ) -> anyhow::Result<Vec<github::GitHubReviewComment>> {
7890            MockGitHubClient.pull_review_comments(owner, repo, number)
7891        }
7892    }
7893}