Skip to main content

rag_rat_core/index/
mod.rs

1pub mod ai;
2pub mod anchors;
3pub mod chunker;
4pub mod edges;
5pub mod git_history;
6pub mod github;
7pub mod parser;
8pub mod schema;
9pub mod symbols;
10pub mod walker;
11
12#[cfg(test)]
13mod anchor_tests;
14#[cfg(test)]
15mod parser_tests;
16
17use std::{
18    collections::{BTreeMap, BTreeSet},
19    fs,
20    path::{Path, PathBuf},
21    process::Command,
22    sync::{
23        atomic::{AtomicUsize, Ordering},
24        mpsc,
25    },
26    thread,
27    thread::JoinHandle,
28    time::{SystemTime, UNIX_EPOCH},
29};
30
31use gix::{
32    bstr::{BString, ByteSlice},
33    status::{UntrackedFiles, tree_index},
34};
35use rayon::prelude::*;
36use regex::Regex;
37use rusqlite::{OptionalExtension, params};
38use serde::Serialize;
39use sha2::{Digest, Sha256};
40use thiserror::Error;
41
42use crate::{
43    config::{Config, TargetKind},
44    index::{
45        ai::{LocalAiStatus, ModelInfo, ReconcilePlan, ReconcileReport},
46        anchors::{AnchorStatus, ChunkAnchor},
47        chunker::Chunk,
48        git_history::{
49            ChunkBlameSummary, CommitSearchHit, GitHistoryIndexStatus, PathHistoryItem,
50            QueryCommitHit, SymbolHistoryItem,
51        },
52        github::{GitHubEvidence, GitHubStatus, GitHubSyncReport, Papertrail},
53        symbols::Symbol,
54    },
55    language::Language,
56    query::graph_meta::{self, GraphMetaMode},
57    search::lexical::{SearchHit, SearchOptions},
58    storage::IndexConnection,
59    storage::StorageStatus,
60};
61
62#[derive(Debug)]
63pub struct IndexDatabase {
64    storage: IndexConnection,
65    pub active_commit_sha: String,
66    pub active_worktree_id: String,
67}
68
69#[derive(Debug, Clone)]
70pub enum IndexProgress {
71    Started {
72        database: PathBuf,
73        mode: IndexMode,
74    },
75    Discovering,
76    Discovered {
77        files: usize,
78    },
79    PreparingFile {
80        current: usize,
81        total: usize,
82        path: PathBuf,
83        language: Language,
84        kind: TargetKind,
85    },
86    IndexingFile {
87        current: usize,
88        total: usize,
89        path: PathBuf,
90        language: Language,
91        kind: TargetKind,
92    },
93    IndexingGitHistory,
94    RebuildingLogicalSymbols,
95    ResolvingGraph,
96    SyncingFts,
97    RebuildingFts,
98    Finished {
99        files: usize,
100    },
101}
102
103#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
104#[serde(rename_all = "snake_case")]
105pub enum IndexMode {
106    Changed,
107    Discover,
108    Full,
109}
110
111impl IndexMode {
112    pub fn label(self) -> &'static str {
113        match self {
114            Self::Changed => "changed files",
115            Self::Discover => "discovery",
116            Self::Full => "full rebuild",
117        }
118    }
119}
120
121#[derive(Debug, Serialize)]
122pub struct IndexStatus {
123    pub database: String,
124    pub exists: bool,
125    pub schema: schema::SchemaStatus,
126    pub git_commit: Option<String>,
127    pub git_dirty: Option<bool>,
128    pub indexed_at_ms: Option<i64>,
129    pub content_revision: String,
130    pub fts_synced_at_ms: Option<i64>,
131    pub fts_source_revision: Option<String>,
132    pub fts_dirty: bool,
133    pub fts_fresh: bool,
134    pub file_count_by_language: BTreeMap<String, u64>,
135    pub parser_failures: u64,
136    pub parser_failure_paths: Vec<ParserFailure>,
137    pub git_history: GitHistoryIndexStatus,
138    pub github: GitHubStatus,
139    pub local_ai: LocalAiStatus,
140}
141
142#[derive(Debug, Serialize)]
143pub struct HealIndexReport {
144    pub checked_files: u64,
145    pub healed_files: u64,
146    pub removed_files: u64,
147    pub skipped_files: u64,
148    pub fts_fresh: bool,
149    pub message: Option<String>,
150}
151
152#[derive(Debug, Serialize)]
153pub struct ParserFailure {
154    pub path: String,
155    pub language: String,
156    pub message: String,
157}
158
159#[derive(Debug, Serialize)]
160pub struct DiscoveryStatus {
161    pub discovered_files: usize,
162    pub indexed_files: usize,
163    pub unindexed_files: usize,
164    pub unindexed_source_files: usize,
165    pub changed_indexed_files: usize,
166    pub removed_indexed_files: usize,
167    pub unindexed_sample: Vec<String>,
168    pub warning: Option<String>,
169}
170
171const MAX_AUTO_HEAL_FILES_PER_CALL: usize = 4;
172const GRAPH_INDEX_VERSION: &str = "6";
173
174#[derive(Debug, Error)]
175pub enum IndexError {
176    #[error("Gone: indexed chunk {chunk_id} no longer exists")]
177    Gone { chunk_id: i64 },
178    #[error("StaleChunk: chunk {chunk_id} in {path} could not be relocated after reindex")]
179    StaleChunk { chunk_id: i64, path: String },
180    #[error("needs_reindex: {stale_files} stale files exceeds automatic heal cap {cap}")]
181    NeedsReindex { stale_files: usize, cap: usize },
182}
183
184impl IndexDatabase {
185    pub fn open(path: &Path) -> anyhow::Result<Self> {
186        Self::open_with_graph_check(path, true)
187    }
188
189    pub fn database_path(&self) -> &Path {
190        self.storage.database_path()
191    }
192
193    fn open_with_graph_check(path: &Path, check_graph: bool) -> anyhow::Result<Self> {
194        let mut storage = IndexConnection::open(path)?;
195        schema::check_compatible(storage.connection())?;
196        ai::ensure_model_manifest(storage.connection())?;
197        if let Some(root) = meta_for(storage.connection(), "source_root")? {
198            storage.set_source_root(PathBuf::from(root));
199        }
200        let db =
201            Self { storage, active_commit_sha: String::new(), active_worktree_id: String::new() };
202        if check_graph {
203            db.ensure_graph_index_current()?;
204        }
205        Ok(db)
206    }
207
208    pub fn open_config(config: &Config) -> anyhow::Result<Self> {
209        let mut db = Self::open_with_graph_check(&config.database, false)?;
210        db.storage.set_source_root(config.root.clone());
211        let (commit_sha, worktree_id) = resolve_git_context(&config.root);
212        db.set_context(&commit_sha, &worktree_id)?;
213        db.ensure_graph_index_current()?;
214        Ok(db)
215    }
216
217    pub fn migrate(path: &Path) -> anyhow::Result<schema::SchemaStatus> {
218        Self::migrate_with_fastembed_cache(path, None)
219    }
220
221    fn migrate_with_fastembed_cache(
222        path: &Path,
223        fastembed_cache_dir: Option<&Path>,
224    ) -> anyhow::Result<schema::SchemaStatus> {
225        let storage = IndexConnection::open(path)?;
226        let status = schema::status(storage.connection())?;
227        match status.state {
228            schema::SchemaState::Newer | schema::SchemaState::Dirty => {
229                anyhow::bail!("{}", status.message);
230            },
231            schema::SchemaState::Compatible => {},
232            schema::SchemaState::Missing | schema::SchemaState::Older => {
233                schema::apply(storage.connection())?;
234            },
235        }
236        ai::ensure_model_manifest(storage.connection())?;
237        if let Some(fastembed_cache_dir) = fastembed_cache_dir {
238            ai::recover_cached_fastembed_model_from(storage.connection(), fastembed_cache_dir)?;
239        } else {
240            ai::recover_cached_fastembed_model(storage.connection())?;
241        }
242        schema::status(storage.connection())
243    }
244
245    pub fn migration_check(path: &Path) -> anyhow::Result<schema::SchemaStatus> {
246        let storage = IndexConnection::open(path)?;
247        schema::status(storage.connection())
248    }
249
250    fn create_or_migrate(path: &Path) -> anyhow::Result<Self> {
251        let mut storage = IndexConnection::open(path)?;
252        schema::apply(storage.connection())?;
253        ai::ensure_model_manifest(storage.connection())?;
254        if let Some(root) = meta_for(storage.connection(), "source_root")? {
255            storage.set_source_root(PathBuf::from(root));
256        }
257        Ok(Self { storage, active_commit_sha: String::new(), active_worktree_id: String::new() })
258    }
259
260    pub fn set_context(&mut self, commit_sha: &str, worktree_id: &str) -> anyhow::Result<()> {
261        self.active_commit_sha = commit_sha.to_string();
262        self.active_worktree_id = worktree_id.to_string();
263
264        let conn = self.storage.connection();
265        conn.execute_batch(
266            "
267            CREATE TEMP TABLE IF NOT EXISTS connection_context(key TEXT PRIMARY KEY, value TEXT);
268        ",
269        )?;
270
271        let mut stmt = conn.prepare(
272            "INSERT OR REPLACE INTO temp.connection_context(key, value) VALUES (?1, ?2)",
273        )?;
274        stmt.execute(params!["commit_sha", commit_sha])?;
275        stmt.execute(params!["worktree_id", worktree_id])?;
276
277        conn.execute_batch("
278            DROP VIEW IF EXISTS temp.files;
279            CREATE TEMP VIEW temp.files AS
280            SELECT id, path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms, indexed_revision, commit_sha, worktree_id
281            FROM main.files
282            WHERE worktree_id = (SELECT value FROM temp.connection_context WHERE key = 'worktree_id') AND worktree_id != '' AND kind != 'deleted'
283            UNION ALL
284            SELECT id, path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms, indexed_revision, commit_sha, worktree_id
285            FROM main.files
286            WHERE commit_sha = (SELECT value FROM temp.connection_context WHERE key = 'commit_sha')
287              AND commit_sha != ''
288              AND path NOT IN (
289                  SELECT path FROM main.files 
290                  WHERE worktree_id = (SELECT value FROM temp.connection_context WHERE key = 'worktree_id')
291                    AND worktree_id != ''
292              );
293        ")?;
294
295        Ok(())
296    }
297
298    pub fn rebuild(config: &Config) -> anyhow::Result<Self> {
299        Self::rebuild_with_progress(config, |_| {})
300    }
301
302    pub fn rebuild_with_progress<F>(config: &Config, mut progress: F) -> anyhow::Result<Self>
303    where
304        F: FnMut(IndexProgress),
305    {
306        progress(IndexProgress::Started {
307            database: config.database.clone(),
308            mode: IndexMode::Full,
309        });
310        let mut db = Self::create_or_migrate(&config.database)?;
311        let (commit_sha, worktree_id) = resolve_git_context(&config.root);
312        db.set_context(&commit_sha, &worktree_id)?;
313        progress(IndexProgress::IndexingGitHistory);
314        let mut git_history = Some(spawn_git_history_prepare(&config.root));
315        let result = (|| -> anyhow::Result<()> {
316            db.storage.execute_batch("BEGIN TRANSACTION")?;
317            db.clear_full_rebuild_tables()?;
318            db.set_meta("source_root", &config.root.display().to_string())?;
319            db.storage.set_source_root(config.root.clone());
320            db.write_git_meta(&config.root)?;
321            let indexed = db.index_targets_with_progress(config, &mut progress)?;
322            db.apply_prepared_git_history(
323                &config.root,
324                git_history
325                    .take()
326                    .ok_or_else(|| anyhow::anyhow!("git history preparation was already used"))?,
327            )?;
328            progress(IndexProgress::RebuildingLogicalSymbols);
329            db.rebuild_logical_symbols()?;
330            progress(IndexProgress::ResolvingGraph);
331            db.resolve_edges()?;
332            db.mark_graph_index_current()?;
333            progress(IndexProgress::RebuildingFts);
334            db.rebuild_fts()?;
335            db.set_meta("indexed_at_ms", &now_ms().to_string())?;
336            db.storage.execute_batch("COMMIT")?;
337            progress(IndexProgress::Finished { files: indexed });
338            Ok(())
339        })();
340        if result.is_err() {
341            if let Some(handle) = git_history.take() {
342                let _ = join_git_history_prepare(handle);
343            }
344            let _ = db.storage.execute_batch("ROLLBACK");
345        }
346        result?;
347        Ok(db)
348    }
349
350    fn clear_full_rebuild_tables(&self) -> anyhow::Result<()> {
351        self.storage.execute_batch(
352            "
353            CREATE TEMP TABLE IF NOT EXISTS full_rebuild_file_ids(id INTEGER PRIMARY KEY);
354            DELETE FROM temp.full_rebuild_file_ids;
355            INSERT OR IGNORE INTO temp.full_rebuild_file_ids(id)
356            SELECT id
357            FROM main.files
358            WHERE worktree_id = (SELECT value FROM temp.connection_context WHERE key = 'worktree_id')
359              AND worktree_id != '';
360            INSERT OR IGNORE INTO temp.full_rebuild_file_ids(id)
361            SELECT id
362            FROM main.files
363            WHERE commit_sha = (SELECT value FROM temp.connection_context WHERE key = 'commit_sha')
364              AND commit_sha != ''
365              AND path NOT IN (
366                  SELECT path FROM main.files
367                  WHERE worktree_id = (SELECT value FROM temp.connection_context WHERE key = 'worktree_id')
368                    AND worktree_id != ''
369              );
370
371            UPDATE main.edges
372            SET to_symbol_id = NULL,
373                target_start_line = NULL,
374                target_end_line = NULL,
375                resolution = 'unresolved'
376            WHERE to_symbol_id IN (
377                SELECT symbols.id
378                FROM main.symbols
379                JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = symbols.file_id
380            );
381            DELETE FROM main.edges
382            WHERE source_file_id IN (SELECT id FROM temp.full_rebuild_file_ids)
383               OR from_symbol_id IN (
384                    SELECT symbols.id
385                    FROM main.symbols
386                    JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = symbols.file_id
387               );
388
389            DELETE FROM main.logical_symbol_members
390            WHERE symbol_id IN (
391                SELECT symbols.id
392                FROM main.symbols
393                JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = symbols.file_id
394            );
395            DELETE FROM main.logical_symbols
396            WHERE id NOT IN (
397                SELECT logical_symbol_id FROM main.logical_symbol_members
398            );
399            DELETE FROM main.symbol_facts
400            WHERE symbol_id IN (
401                SELECT symbols.id
402                FROM main.symbols
403                JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = symbols.file_id
404            );
405            DELETE FROM main.chunk_fts
406            WHERE rowid IN (
407                SELECT chunks.id
408                FROM main.chunks
409                JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = chunks.file_id
410            );
411            DELETE FROM main.chunk_summaries
412            WHERE chunk_id IN (
413                SELECT chunks.id
414                FROM main.chunks
415                JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = chunks.file_id
416            );
417            DELETE FROM main.chunk_embeddings
418            WHERE chunk_id IN (
419                SELECT chunks.id
420                FROM main.chunks
421                JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = chunks.file_id
422            );
423            DELETE FROM main.git_chunk_blame
424            WHERE chunk_id IN (
425                SELECT chunks.id
426                FROM main.chunks
427                JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = chunks.file_id
428            );
429            DELETE FROM main.docs
430            WHERE chunk_id IN (
431                SELECT chunks.id
432                FROM main.chunks
433                JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = chunks.file_id
434            );
435            DELETE FROM main.parser_failures
436            WHERE path IN (
437                SELECT path
438                FROM main.files
439                JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = files.id
440            );
441            DELETE FROM main.symbols
442            WHERE file_id IN (SELECT id FROM temp.full_rebuild_file_ids);
443            DELETE FROM main.chunks
444            WHERE file_id IN (SELECT id FROM temp.full_rebuild_file_ids);
445            DELETE FROM main.files
446            WHERE id IN (SELECT id FROM temp.full_rebuild_file_ids);
447            DELETE FROM temp.full_rebuild_file_ids;
448            ",
449        )?;
450        Ok(())
451    }
452
453    pub fn index_changed(config: &Config) -> anyhow::Result<Self> {
454        Self::index_changed_with_progress(config, |_| {})
455    }
456
457    pub fn index_changed_with_progress<F>(config: &Config, mut progress: F) -> anyhow::Result<Self>
458    where
459        F: FnMut(IndexProgress),
460    {
461        Self::index_incremental_with_progress(config, IndexMode::Changed, &mut progress)
462    }
463
464    pub fn index_discover(config: &Config) -> anyhow::Result<Self> {
465        Self::index_discover_with_progress(config, |_| {})
466    }
467
468    pub fn index_discover_with_progress<F>(config: &Config, mut progress: F) -> anyhow::Result<Self>
469    where
470        F: FnMut(IndexProgress),
471    {
472        Self::index_incremental_with_progress(config, IndexMode::Discover, &mut progress)
473    }
474
475    fn index_incremental_with_progress<F>(
476        config: &Config,
477        mode: IndexMode,
478        progress: &mut F,
479    ) -> anyhow::Result<Self>
480    where
481        F: FnMut(IndexProgress),
482    {
483        if !config.database.exists() {
484            return Self::rebuild_with_progress(config, progress);
485        }
486        if Self::migration_check(&config.database)?.state == schema::SchemaState::Missing {
487            return Self::rebuild_with_progress(config, progress);
488        }
489
490        let mut db = Self::open(&config.database)?;
491        let (commit_sha, worktree_id) = resolve_git_context(&config.root);
492        db.set_context(&commit_sha, &worktree_id)?;
493        if db.indexed_file_count()? == 0 {
494            return Self::rebuild_with_progress(config, progress);
495        }
496        progress(IndexProgress::Started { database: config.database.clone(), mode });
497        progress(IndexProgress::IndexingGitHistory);
498        let mut git_history = Some(spawn_git_history_prepare(&config.root));
499        let result = (|| -> anyhow::Result<()> {
500            db.storage.execute_batch("BEGIN TRANSACTION")?;
501            db.set_meta("source_root", &config.root.display().to_string())?;
502            db.storage.set_source_root(config.root.clone());
503            db.write_git_meta(&config.root)?;
504            let indexed = match mode {
505                IndexMode::Changed => db.index_changed_files_with_progress(config, progress)?,
506                IndexMode::Discover => db.index_discovered_files_with_progress(config, progress)?,
507                IndexMode::Full => unreachable!("full mode is handled by rebuild_with_progress"),
508            };
509            db.apply_prepared_git_history(
510                &config.root,
511                git_history
512                    .take()
513                    .ok_or_else(|| anyhow::anyhow!("git history preparation was already used"))?,
514            )?;
515            if indexed > 0 {
516                progress(IndexProgress::RebuildingLogicalSymbols);
517                db.rebuild_logical_symbols()?;
518                progress(IndexProgress::ResolvingGraph);
519                db.resolve_edges()?;
520                db.mark_graph_index_current()?;
521                progress(IndexProgress::SyncingFts);
522                db.sync_fts()?;
523            }
524            db.set_meta("indexed_at_ms", &now_ms().to_string())?;
525            db.storage.execute_batch("COMMIT")?;
526            progress(IndexProgress::Finished { files: indexed });
527            Ok(())
528        })();
529        if result.is_err() {
530            if let Some(handle) = git_history.take() {
531                let _ = join_git_history_prepare(handle);
532            }
533            let _ = db.storage.execute_batch("ROLLBACK");
534        }
535        result?;
536        Ok(db)
537    }
538
539    pub fn index_targets(&self, config: &Config) -> anyhow::Result<()> {
540        self.index_targets_with_progress(config, &mut |_| {})?;
541        Ok(())
542    }
543
544    fn index_targets_with_progress<F>(
545        &self,
546        config: &Config,
547        progress: &mut F,
548    ) -> anyhow::Result<usize>
549    where
550        F: FnMut(IndexProgress),
551    {
552        progress(IndexProgress::Discovering);
553        let files = collect_index_files(config)?;
554        let changes = git_changed_paths(&config.root).unwrap_or_default();
555        let files = self.assign_file_scopes(files, &changes);
556        progress(IndexProgress::Discovered { files: files.len() });
557
558        let prepared = prepare_files_with_progress(&files, progress)?;
559        for (index, prepared_file) in prepared.iter().enumerate() {
560            let current = index + 1;
561            if should_report_file_progress(current, files.len()) {
562                progress(IndexProgress::IndexingFile {
563                    current,
564                    total: files.len(),
565                    path: prepared_file.file.relative_path.clone(),
566                    language: prepared_file.file.language,
567                    kind: prepared_file.file.kind,
568                });
569            }
570            self.insert_prepared_file(prepared_file)?;
571        }
572
573        Ok(files.len())
574    }
575
576    fn index_changed_files_with_progress<F>(
577        &self,
578        config: &Config,
579        progress: &mut F,
580    ) -> anyhow::Result<usize>
581    where
582        F: FnMut(IndexProgress),
583    {
584        progress(IndexProgress::Discovering);
585        let changes = git_changed_paths(&config.root)?;
586        let files = collect_changed_index_files(config, &changes)?;
587        let files = self.assign_file_scopes(files, &changes);
588        self.apply_incremental_file_plan(files, changes.deleted, progress)
589    }
590
591    fn index_discovered_files_with_progress<F>(
592        &self,
593        config: &Config,
594        progress: &mut F,
595    ) -> anyhow::Result<usize>
596    where
597        F: FnMut(IndexProgress),
598    {
599        progress(IndexProgress::Discovering);
600        let plan = discovery_plan(self.storage.connection(), config)?;
601        let changes = git_changed_paths(&config.root).unwrap_or_default();
602        let files = self.assign_file_scopes(plan.files, &changes);
603        self.apply_incremental_file_plan(files, plan.deleted, progress)
604    }
605
606    fn assign_file_scopes(
607        &self,
608        files: Vec<IndexFile>,
609        changes: &GitChangedPaths,
610    ) -> Vec<IndexFile> {
611        let has_base_commit = !self.active_commit_sha.is_empty();
612        files
613            .into_iter()
614            .map(|mut file| {
615                if !has_base_commit || changes.changed.contains(&file.relative_path) {
616                    file.commit_sha.clear();
617                    file.worktree_id.clone_from(&self.active_worktree_id);
618                } else {
619                    file.commit_sha.clone_from(&self.active_commit_sha);
620                    file.worktree_id.clear();
621                }
622                file
623            })
624            .collect()
625    }
626
627    fn apply_incremental_file_plan<F>(
628        &self,
629        files: Vec<IndexFile>,
630        deleted: BTreeSet<PathBuf>,
631        progress: &mut F,
632    ) -> anyhow::Result<usize>
633    where
634        F: FnMut(IndexProgress),
635    {
636        progress(IndexProgress::Discovered { files: files.len() });
637
638        let deleted_count = deleted.len();
639        for path in deleted {
640            self.mark_file_deleted(&path)?;
641        }
642
643        let prepared = prepare_files_with_progress(&files, progress)?;
644        for (index, prepared_file) in prepared.iter().enumerate() {
645            let current = index + 1;
646            if should_report_file_progress(current, files.len()) {
647                progress(IndexProgress::IndexingFile {
648                    current,
649                    total: files.len(),
650                    path: prepared_file.file.relative_path.clone(),
651                    language: prepared_file.file.language,
652                    kind: prepared_file.file.kind,
653                });
654            }
655            self.remove_file_in_scope(
656                &prepared_file.file.relative_path,
657                &prepared_file.file.commit_sha,
658                &prepared_file.file.worktree_id,
659            )?;
660            self.insert_prepared_file(prepared_file)?;
661        }
662
663        Ok(files.len() + deleted_count)
664    }
665
666    pub fn status(&self, database: &Path) -> anyhow::Result<IndexStatus> {
667        let mut counts = BTreeMap::new();
668        let mut stmt = self
669            .storage
670            .connection()
671            .prepare("SELECT language, COUNT(*) FROM files GROUP BY language ORDER BY language")?;
672        let rows =
673            stmt.query_map([], |row| Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?)))?;
674        for row in rows {
675            let (language, count) = row?;
676            counts.insert(language, u64::try_from(count).unwrap_or(0));
677        }
678
679        let content_revision = self.content_revision()?;
680        let fts_source_revision = self.meta("fts_source_revision")?;
681        let fts_dirty = self.fts_dirty()?;
682
683        Ok(IndexStatus {
684            database: database.display().to_string(),
685            exists: database.exists(),
686            schema: schema::status(self.storage.connection())?,
687            git_commit: self.meta("git_commit")?,
688            git_dirty: self.meta("git_dirty")?.map(|value| value == "true"),
689            indexed_at_ms: self.meta("indexed_at_ms")?.and_then(|value| value.parse::<i64>().ok()),
690            content_revision: content_revision.clone(),
691            fts_synced_at_ms: self
692                .meta("fts_synced_at_ms")?
693                .and_then(|value| value.parse::<i64>().ok()),
694            fts_dirty,
695            fts_fresh: !fts_dirty
696                && fts_source_revision.as_deref() == Some(content_revision.as_str()),
697            fts_source_revision,
698            file_count_by_language: counts,
699            parser_failures: self.parser_failure_count()?,
700            parser_failure_paths: self.parser_failure_paths()?,
701            git_history: self.git_history_status()?,
702            github: self.github_status()?,
703            local_ai: self.local_ai_status()?,
704        })
705    }
706
707    pub fn storage_status(&self) -> anyhow::Result<StorageStatus> {
708        self.storage.status()
709    }
710
711    pub fn discovery_status(&self, config: &Config) -> anyhow::Result<DiscoveryStatus> {
712        let plan = discovery_plan(self.storage.connection(), config)?;
713        let unindexed_source_files =
714            plan.unindexed.iter().filter(|file| file.kind == TargetKind::Source).count();
715        let unindexed_sample =
716            plan.unindexed.iter().take(10).map(|file| path_string(&file.relative_path)).collect();
717        let warning = (unindexed_source_files > 0).then(|| {
718            format!(
719                "{unindexed_source_files} unindexed source files detected. Run `rag-rat index --full` or `rag-rat index --discover`."
720            )
721        });
722        Ok(DiscoveryStatus {
723            discovered_files: plan.discovered_files,
724            indexed_files: plan.indexed_files,
725            unindexed_files: plan.unindexed.len(),
726            unindexed_source_files,
727            changed_indexed_files: plan.changed.len(),
728            removed_indexed_files: plan.deleted.len(),
729            unindexed_sample,
730            warning,
731        })
732    }
733
734    pub fn search(
735        &self,
736        query: &str,
737        limit: u32,
738        include_generated: bool,
739    ) -> anyhow::Result<Vec<SearchHit>> {
740        self.search_with_graph_meta(query, limit, include_generated, GraphMetaMode::Compact, 3)
741    }
742
743    pub fn search_explain(
744        &self,
745        query: &str,
746        limit: u32,
747        include_generated: bool,
748    ) -> anyhow::Result<Vec<SearchHit>> {
749        self.search_explain_with_graph_meta(
750            query,
751            limit,
752            include_generated,
753            GraphMetaMode::Compact,
754            3,
755        )
756    }
757
758    pub fn search_with_graph_meta(
759        &self,
760        query: &str,
761        limit: u32,
762        include_generated: bool,
763        graph_mode: GraphMetaMode,
764        graph_limit: u32,
765    ) -> anyhow::Result<Vec<SearchHit>> {
766        self.search_with_graph_meta_options(
767            query,
768            limit,
769            include_generated,
770            graph_mode,
771            graph_limit,
772            SearchOptions::default(),
773        )
774    }
775
776    pub fn search_with_graph_meta_options(
777        &self,
778        query: &str,
779        limit: u32,
780        include_generated: bool,
781        graph_mode: GraphMetaMode,
782        graph_limit: u32,
783        options: SearchOptions,
784    ) -> anyhow::Result<Vec<SearchHit>> {
785        self.ensure_fts_fresh()?;
786        let mut hits =
787            self.search_with_heal(query, limit, include_generated, true, false, options)?;
788        graph_meta::attach_to_search_hits(
789            self.storage.connection(),
790            &mut hits,
791            graph_mode,
792            graph_limit,
793        )?;
794        Ok(hits)
795    }
796
797    pub fn search_explain_with_graph_meta(
798        &self,
799        query: &str,
800        limit: u32,
801        include_generated: bool,
802        graph_mode: GraphMetaMode,
803        graph_limit: u32,
804    ) -> anyhow::Result<Vec<SearchHit>> {
805        self.search_explain_with_graph_meta_options(
806            query,
807            limit,
808            include_generated,
809            graph_mode,
810            graph_limit,
811            SearchOptions::default(),
812        )
813    }
814
815    pub fn search_explain_with_graph_meta_options(
816        &self,
817        query: &str,
818        limit: u32,
819        include_generated: bool,
820        graph_mode: GraphMetaMode,
821        graph_limit: u32,
822        options: SearchOptions,
823    ) -> anyhow::Result<Vec<SearchHit>> {
824        self.ensure_fts_fresh()?;
825        let mut hits =
826            self.search_with_heal(query, limit, include_generated, true, true, options)?;
827        graph_meta::attach_to_search_hits(
828            self.storage.connection(),
829            &mut hits,
830            graph_mode,
831            graph_limit,
832        )?;
833        Ok(hits)
834    }
835
836    pub fn symbols(
837        &self,
838        name: &str,
839        language: Option<Language>,
840        limit: u32,
841    ) -> anyhow::Result<Vec<crate::query::symbol::SymbolHit>> {
842        crate::query::symbol::lookup(self.storage.connection(), name, language, limit)
843    }
844
845    pub fn symbol_candidates(
846        &self,
847        selector: &crate::query::symbol::SymbolSelector,
848    ) -> anyhow::Result<crate::query::symbol::SymbolLookup> {
849        crate::query::symbol::lookup_candidates(self.storage.connection(), selector)
850    }
851
852    pub fn select_symbol(
853        &self,
854        selector: &crate::query::symbol::SymbolSelector,
855    ) -> anyhow::Result<
856        Result<Option<crate::query::symbol::SymbolHit>, crate::query::symbol::SymbolDisambiguation>,
857    > {
858        crate::query::symbol::select_one(self.storage.connection(), selector)
859    }
860
861    pub fn read_chunk(&self, chunk_id: i64) -> anyhow::Result<Option<crate::query::ReadChunk>> {
862        self.read_chunk_with_graph_and_memories(chunk_id, GraphMetaMode::Full, 20, true)
863    }
864
865    pub fn read_chunk_with_graph(
866        &self,
867        chunk_id: i64,
868        graph_mode: GraphMetaMode,
869        graph_limit: u32,
870    ) -> anyhow::Result<Option<crate::query::ReadChunk>> {
871        self.read_chunk_with_graph_and_memories(chunk_id, graph_mode, graph_limit, false)
872    }
873
874    pub fn read_chunk_with_graph_and_memories(
875        &self,
876        chunk_id: i64,
877        graph_mode: GraphMetaMode,
878        graph_limit: u32,
879        include_memories: bool,
880    ) -> anyhow::Result<Option<crate::query::ReadChunk>> {
881        let Some(mut chunk) = self.read_chunk_current(chunk_id)? else {
882            return Ok(None);
883        };
884        graph_meta::attach_to_read_chunk(
885            self.storage.connection(),
886            &mut chunk,
887            graph_mode,
888            graph_limit,
889        )?;
890        if include_memories {
891            chunk.memories =
892                crate::query::memory::memories_for_chunk(self.storage.connection(), chunk_id, 20)?;
893        }
894        Ok(Some(chunk))
895    }
896
897    fn read_chunk_current(&self, chunk_id: i64) -> anyhow::Result<Option<crate::query::ReadChunk>> {
898        let Some(mut chunk) = crate::query::read_chunk(self.storage.connection(), chunk_id)? else {
899            return Ok(None);
900        };
901        let Some(root) = self.storage.source_root() else {
902            return Ok(Some(chunk));
903        };
904        let source_path = root.join(&chunk.path);
905        let current_text = match fs::read_to_string(&source_path) {
906            Ok(text) => text,
907            Err(_) => {
908                let path = chunk.path.clone();
909                self.mark_file_deleted(Path::new(&path))?;
910                self.sync_fts()?;
911                anyhow::bail!(IndexError::Gone { chunk_id });
912            },
913        };
914        let anchor = self.chunk_anchor(chunk_id)?;
915        let status = anchors::validate(
916            &chunk.text,
917            usize::try_from(chunk.start_line).unwrap_or(1),
918            usize::try_from(chunk.end_line).unwrap_or(1),
919            &anchor,
920            &current_text,
921        );
922        match status {
923            AnchorStatus::Exact => {
924                if let Some(text) = anchors::slice_lines(
925                    &current_text,
926                    usize::try_from(chunk.start_line).unwrap_or(1),
927                    usize::try_from(chunk.end_line).unwrap_or(1),
928                ) {
929                    chunk.text = text;
930                }
931                Ok(Some(chunk))
932            },
933            AnchorStatus::Relocated { start_line, end_line, text } => {
934                chunk.start_line = i64::try_from(start_line)?;
935                chunk.end_line = i64::try_from(end_line)?;
936                chunk.text = text;
937                Ok(Some(chunk))
938            },
939            AnchorStatus::Stale => {
940                self.heal_file(Path::new(&chunk.path))?;
941                self.sync_fts()?;
942                let healed = crate::query::read_chunk(self.storage.connection(), chunk_id)?;
943                match healed {
944                    Some(chunk) => Ok(Some(chunk)),
945                    None => anyhow::bail!(IndexError::StaleChunk { chunk_id, path: chunk.path }),
946                }
947            },
948        }
949    }
950
951    pub fn search_hash_baseline(
952        &self,
953        query: &str,
954        limit: u32,
955        include_generated: bool,
956    ) -> anyhow::Result<Vec<SearchHit>> {
957        self.ensure_fts_fresh()?;
958        crate::search::lexical::search_hash_baseline(
959            self.storage.connection(),
960            query,
961            limit,
962            include_generated,
963        )
964    }
965
966    pub fn docs_for_symbol(&self, symbol: &str, limit: u32) -> anyhow::Result<Vec<SearchHit>> {
967        self.search(symbol, limit, true)
968    }
969
970    pub fn docs_for_selected_symbol(
971        &self,
972        symbol: &crate::query::symbol::SymbolHit,
973        limit: u32,
974    ) -> anyhow::Result<Vec<SearchHit>> {
975        let mut hits = self.local_symbol_context_hits(symbol, limit)?;
976        hits.extend(self.search(&symbol.name, limit.saturating_mul(4).max(limit), true)?);
977        rank_docs_for_symbol(symbol, &mut hits);
978        dedupe_search_hits(&mut hits);
979        hits.truncate(usize::try_from(limit).unwrap_or(usize::MAX));
980        Ok(hits)
981    }
982
983    pub fn commit_search(&self, query: &str, limit: u32) -> anyhow::Result<Vec<CommitSearchHit>> {
984        git_history::commit_search(self.storage.connection(), query, limit)
985    }
986
987    pub fn git_history_for_path(
988        &self,
989        path: &str,
990        limit: u32,
991    ) -> anyhow::Result<Vec<PathHistoryItem>> {
992        git_history::history_for_path(self.storage.connection(), path, limit)
993    }
994
995    pub fn git_history_for_symbol(
996        &self,
997        symbol: &str,
998        language: Option<Language>,
999        limit: u32,
1000    ) -> anyhow::Result<Vec<SymbolHistoryItem>> {
1001        let symbols = self.symbols(symbol, language, limit)?;
1002        let per_symbol_limit = limit.max(1);
1003        let mut out = Vec::new();
1004        for symbol_hit in symbols {
1005            for commit in self.git_history_for_path(&symbol_hit.path, per_symbol_limit)? {
1006                out.push(SymbolHistoryItem {
1007                    symbol: symbol_hit.name.clone(),
1008                    qualified_name: symbol_hit.qualified_name.clone(),
1009                    path: symbol_hit.path.clone(),
1010                    start_byte: symbol_hit.start_byte,
1011                    end_byte: symbol_hit.end_byte,
1012                    commit,
1013                    evidence_kind: "historical",
1014                });
1015                if out.len() >= usize::try_from(limit).unwrap_or(usize::MAX) {
1016                    return Ok(out);
1017                }
1018            }
1019        }
1020        Ok(out)
1021    }
1022
1023    pub fn commits_touching_query(
1024        &self,
1025        query: &str,
1026        limit: u32,
1027    ) -> anyhow::Result<Vec<QueryCommitHit>> {
1028        let current_hits = self.search(query, limit, true)?;
1029        git_history::commits_touching_query(self.storage.connection(), query, limit, &current_hits)
1030    }
1031
1032    pub fn git_blame_chunk(&self, chunk_id: i64) -> anyhow::Result<Option<ChunkBlameSummary>> {
1033        let Some(chunk) = self.read_chunk(chunk_id)? else {
1034            return Ok(None);
1035        };
1036        let source_text_hash = git_history::source_text_hash(&chunk.text);
1037        if let Some(cached) =
1038            git_history::cached_blame(self.storage.connection(), chunk_id, &source_text_hash)?
1039        {
1040            return Ok(Some(cached));
1041        }
1042        let Some(root) = self.storage.source_root() else {
1043            return Ok(Some(ChunkBlameSummary {
1044                chunk_id,
1045                path: chunk.path,
1046                start_line: chunk.start_line,
1047                end_line: chunk.end_line,
1048                source_text_hash,
1049                line_count: 0,
1050                dominant_commit: None,
1051                dominant_commit_lines: 0,
1052                newest_commit: None,
1053                newest_commit_time_s: None,
1054                oldest_commit: None,
1055                oldest_commit_time_s: None,
1056                commit_counts: BTreeMap::new(),
1057                evidence_kind: "historical",
1058            }));
1059        };
1060        let blame_lines =
1061            git_history::blame_lines(root, &chunk.path, chunk.start_line, chunk.end_line);
1062        let mut counts = BTreeMap::<String, i64>::new();
1063        let mut newest = None::<(String, i64)>;
1064        let mut oldest = None::<(String, i64)>;
1065        for line in &blame_lines {
1066            *counts.entry(line.commit.clone()).or_default() += 1;
1067            if let Some(time) = line.author_time_s {
1068                if newest.as_ref().is_none_or(|(_, newest_time)| time > *newest_time) {
1069                    newest = Some((line.commit.clone(), time));
1070                }
1071                if oldest.as_ref().is_none_or(|(_, oldest_time)| time < *oldest_time) {
1072                    oldest = Some((line.commit.clone(), time));
1073                }
1074            }
1075        }
1076        let dominant = counts
1077            .iter()
1078            .max_by_key(|(commit, count)| (*count, *commit))
1079            .map(|(commit, count)| (commit.clone(), *count));
1080        let summary = ChunkBlameSummary {
1081            chunk_id,
1082            path: chunk.path,
1083            start_line: chunk.start_line,
1084            end_line: chunk.end_line,
1085            source_text_hash,
1086            line_count: i64::try_from(blame_lines.len()).unwrap_or(i64::MAX),
1087            dominant_commit: dominant.as_ref().map(|(commit, _)| commit.clone()),
1088            dominant_commit_lines: dominant.map(|(_, count)| count).unwrap_or(0),
1089            newest_commit: newest.as_ref().map(|(commit, _)| commit.clone()),
1090            newest_commit_time_s: newest.as_ref().map(|(_, time)| *time),
1091            oldest_commit: oldest.as_ref().map(|(commit, _)| commit.clone()),
1092            oldest_commit_time_s: oldest.as_ref().map(|(_, time)| *time),
1093            commit_counts: counts,
1094            evidence_kind: "historical",
1095        };
1096        git_history::store_blame(self.storage.connection(), &summary)?;
1097        Ok(Some(summary))
1098    }
1099
1100    pub fn github_sync_from_refs(&self, offline: bool) -> anyhow::Result<GitHubSyncReport> {
1101        self.github_sync_from_refs_with_progress(offline, |_| {})
1102    }
1103
1104    pub fn github_sync_from_refs_with_progress(
1105        &self,
1106        offline: bool,
1107        progress: impl FnMut(github::GitHubSyncProgress),
1108    ) -> anyhow::Result<GitHubSyncReport> {
1109        let Some(root) = self.storage.source_root() else {
1110            anyhow::bail!("index has no source_root metadata; rebuild required");
1111        };
1112        if offline {
1113            github::sync_from_refs::<github::GhCliGitHubClient>(
1114                self.storage.connection(),
1115                root,
1116                None,
1117                true,
1118            )
1119        } else {
1120            let client = github::GhCliGitHubClient;
1121            github::sync_from_refs_with_progress(
1122                self.storage.connection(),
1123                root,
1124                Some(&client),
1125                false,
1126                progress,
1127            )
1128        }
1129    }
1130
1131    pub fn github_sync_issue(
1132        &self,
1133        issue_ref: &str,
1134        offline: bool,
1135    ) -> anyhow::Result<GitHubSyncReport> {
1136        if offline {
1137            github::sync_issue::<github::GhCliGitHubClient>(
1138                self.storage.connection(),
1139                issue_ref,
1140                None,
1141                true,
1142            )
1143        } else {
1144            let client = github::GhCliGitHubClient;
1145            github::sync_issue(self.storage.connection(), issue_ref, Some(&client), false)
1146        }
1147    }
1148
1149    pub fn github_issue_search(
1150        &self,
1151        query: &str,
1152        limit: u32,
1153    ) -> anyhow::Result<Vec<GitHubEvidence>> {
1154        github::issue_search(self.storage.connection(), query, limit)
1155    }
1156
1157    pub fn rationale_search(&self, query: &str, limit: u32) -> anyhow::Result<Vec<GitHubEvidence>> {
1158        github::rationale_search(self.storage.connection(), query, limit)
1159    }
1160
1161    pub fn github_refs_for_path(
1162        &self,
1163        path: &str,
1164        limit: u32,
1165    ) -> anyhow::Result<Vec<github::GitHubRef>> {
1166        github::refs_for_path(self.storage.connection(), path, limit)
1167    }
1168
1169    pub fn github_sync_status(&self) -> anyhow::Result<GitHubStatus> {
1170        self.github_status()
1171    }
1172
1173    pub fn papertrail_for_chunk(
1174        &self,
1175        chunk_id: i64,
1176        limit: u32,
1177    ) -> anyhow::Result<Option<Papertrail>> {
1178        let Some(chunk) = self.read_chunk(chunk_id)? else {
1179            return Ok(None);
1180        };
1181        Ok(Some(github::papertrail_for_chunk(self.storage.connection(), &chunk, limit)?))
1182    }
1183
1184    pub fn papertrail_for_symbol(
1185        &self,
1186        symbol: &str,
1187        language: Option<Language>,
1188        limit: u32,
1189    ) -> anyhow::Result<Option<Papertrail>> {
1190        let Some(symbol) = self.symbols(symbol, language, limit)?.into_iter().next() else {
1191            return Ok(None);
1192        };
1193        Ok(Some(github::papertrail_for_symbol(self.storage.connection(), &symbol, limit)?))
1194    }
1195
1196    pub fn papertrail_for_selected_symbol(
1197        &self,
1198        symbol: &crate::query::symbol::SymbolHit,
1199        limit: u32,
1200    ) -> anyhow::Result<Papertrail> {
1201        github::papertrail_for_symbol(self.storage.connection(), symbol, limit)
1202    }
1203
1204    pub fn papertrail_for_commit(
1205        &self,
1206        commit_hash: &str,
1207        limit: u32,
1208    ) -> anyhow::Result<Papertrail> {
1209        github::papertrail_for_commit(self.storage.connection(), commit_hash, limit)
1210    }
1211
1212    pub fn local_ai_status(&self) -> anyhow::Result<LocalAiStatus> {
1213        ai::status(self.storage.connection())
1214    }
1215
1216    pub fn list_models(&self) -> anyhow::Result<Vec<ModelInfo>> {
1217        ai::models(self.storage.connection())
1218    }
1219
1220    pub fn install_model(&self, model_id: &str) -> anyhow::Result<ModelInfo> {
1221        ai::install_model(self.storage.connection(), model_id)
1222    }
1223
1224    pub fn reconcile(
1225        &self,
1226        limit: Option<u32>,
1227        batch_size: Option<u32>,
1228    ) -> anyhow::Result<ReconcileReport> {
1229        ai::reconcile(self.storage.connection(), limit, batch_size)
1230    }
1231
1232    pub fn reconcile_plan(&self) -> anyhow::Result<ReconcilePlan> {
1233        ai::reconcile_plan(self.storage.connection())
1234    }
1235
1236    pub fn reconcile_with_progress(
1237        &self,
1238        limit: Option<u32>,
1239        batch_size: Option<u32>,
1240        force: bool,
1241        progress: impl FnMut(ai::ReconcileProgress),
1242    ) -> anyhow::Result<ReconcileReport> {
1243        ai::reconcile_with_progress(self.storage.connection(), limit, batch_size, force, progress)
1244    }
1245
1246    pub fn reconcile_with_options_progress(
1247        &self,
1248        options: ai::ReconcileOptions,
1249        progress: impl FnMut(ai::ReconcileProgress),
1250    ) -> anyhow::Result<ReconcileReport> {
1251        ai::reconcile_with_options_progress(self.storage.connection(), options, progress)
1252    }
1253
1254    pub fn current_embedding_count(&self, model_id: &str) -> anyhow::Result<u64> {
1255        ai::current_embedding_count(self.storage.connection(), model_id)
1256    }
1257
1258    pub fn heal_index(&self, limit: Option<u32>) -> anyhow::Result<HealIndexReport> {
1259        let Some(root) = self.storage.source_root() else {
1260            anyhow::bail!("heal_index requires source_root metadata; run `rag-rat index` first");
1261        };
1262        let indexed_files = self.indexed_files()?;
1263        let max_repairs = limit.map(usize::try_from).transpose()?.unwrap_or(usize::MAX);
1264        let mut report = HealIndexReport {
1265            checked_files: 0,
1266            healed_files: 0,
1267            removed_files: 0,
1268            skipped_files: 0,
1269            fts_fresh: false,
1270            message: None,
1271        };
1272
1273        for file in indexed_files {
1274            report.checked_files += 1;
1275            let path = Path::new(&file.path);
1276            let full_path = root.join(path);
1277            let Ok(text) = fs::read_to_string(&full_path) else {
1278                if usize::try_from(report.healed_files + report.removed_files).unwrap_or(usize::MAX)
1279                    >= max_repairs
1280                {
1281                    report.message =
1282                        Some("limit reached; rerun heal_index to continue".to_string());
1283                    break;
1284                }
1285                self.mark_file_deleted(path)?;
1286                report.removed_files += 1;
1287                continue;
1288            };
1289            let sha256 = hex_sha256(text.as_bytes());
1290            if sha256 == file.sha256 {
1291                report.skipped_files += 1;
1292                continue;
1293            }
1294            if usize::try_from(report.healed_files + report.removed_files).unwrap_or(usize::MAX)
1295                >= max_repairs
1296            {
1297                report.message = Some("limit reached; rerun heal_index to continue".to_string());
1298                break;
1299            }
1300            self.heal_file(path)?;
1301            report.healed_files += 1;
1302        }
1303
1304        if report.healed_files > 0 || report.removed_files > 0 {
1305            self.sync_fts()?;
1306        } else {
1307            self.ensure_fts_fresh()?;
1308        }
1309        report.fts_fresh = !self.fts_dirty()?;
1310        Ok(report)
1311    }
1312
1313    pub fn ffi_surface(&self, limit: u32) -> anyhow::Result<Vec<crate::query::impact::ImpactItem>> {
1314        crate::query::impact::ffi_surface(self.storage.connection(), limit)
1315    }
1316
1317    pub fn find_callers(
1318        &self,
1319        symbol: &str,
1320        limit: u32,
1321    ) -> anyhow::Result<Vec<crate::query::graph::GraphHop>> {
1322        crate::query::graph::traverse(self.storage.connection(), symbol, true, limit)
1323    }
1324
1325    pub fn find_callers_with_options(
1326        &self,
1327        symbol: &str,
1328        limit: u32,
1329        options: &crate::query::graph::GraphTraversalOptions,
1330    ) -> anyhow::Result<Vec<crate::query::graph::GraphHop>> {
1331        let options = self.graph_options_with_logical_group(options)?;
1332        crate::query::graph::traverse_with_options(
1333            self.storage.connection(),
1334            symbol,
1335            true,
1336            limit,
1337            &options,
1338        )
1339    }
1340
1341    pub fn trace_callees(
1342        &self,
1343        symbol: &str,
1344        limit: u32,
1345    ) -> anyhow::Result<Vec<crate::query::graph::GraphHop>> {
1346        crate::query::graph::traverse(self.storage.connection(), symbol, false, limit)
1347    }
1348
1349    pub fn trace_callees_with_options(
1350        &self,
1351        symbol: &str,
1352        limit: u32,
1353        options: &crate::query::graph::GraphTraversalOptions,
1354    ) -> anyhow::Result<Vec<crate::query::graph::GraphHop>> {
1355        let options = self.graph_options_with_logical_group(options)?;
1356        crate::query::graph::traverse_with_options(
1357            self.storage.connection(),
1358            symbol,
1359            false,
1360            limit,
1361            &options,
1362        )
1363    }
1364
1365    pub fn graph_traversal_report(
1366        &self,
1367        tool: &str,
1368        symbol: &crate::query::symbol::SymbolHit,
1369        reverse: bool,
1370        limit: u32,
1371        options: &crate::query::graph::GraphTraversalOptions,
1372    ) -> anyhow::Result<crate::query::graph::GraphTraversalReport> {
1373        let options = self.graph_options_with_logical_group(options)?;
1374        let results = crate::query::graph::traverse_with_options(
1375            self.storage.connection(),
1376            &symbol.qualified_name,
1377            reverse,
1378            limit,
1379            &options,
1380        )?;
1381        let summary = crate::query::graph::traversal_summary(
1382            self.storage.connection(),
1383            &symbol.qualified_name,
1384            reverse,
1385            limit,
1386            &options,
1387            results.len(),
1388        )?;
1389        let (logical_symbol, variants) = self.graph_logical_symbol(options.logical_symbol_id)?;
1390        let mut paths = BTreeSet::new();
1391        paths.insert(symbol.path.clone());
1392        for result in &results {
1393            if let Some(callsite) = &result.callsite {
1394                paths.insert(callsite.path.clone());
1395            }
1396        }
1397        let mut coverage = self.graph_coverage(paths)?;
1398        if summary.unresolved > 0 {
1399            coverage.known_index_gaps.push(format!(
1400                "{} unresolved qualified callsites match the requested final segment but are not verified to this symbol",
1401                summary.unresolved
1402            ));
1403        }
1404        Ok(crate::query::graph::GraphTraversalReport {
1405            query: crate::query::graph::GraphTraversalQuery {
1406                tool: tool.to_string(),
1407                symbol_id: Some(symbol.symbol_id),
1408                logical_symbol_id: options.logical_symbol_id,
1409                symbol_path: symbol.qualified_name.clone(),
1410                resolution: options.resolution_mode.as_str().to_string(),
1411            },
1412            logical_symbol,
1413            variants,
1414            summary,
1415            coverage,
1416            results,
1417        })
1418    }
1419
1420    pub fn compare_graph_to_text(
1421        &self,
1422        symbol: &crate::query::symbol::SymbolHit,
1423        pattern: &str,
1424        limit: u32,
1425        options: &crate::query::graph::GraphTraversalOptions,
1426        include_tests: bool,
1427    ) -> anyhow::Result<crate::query::graph::CompareGraphTextReport> {
1428        let regex = Regex::new(pattern)?;
1429        let options = self.graph_options_with_logical_group(options)?;
1430        let mut graph_edges = crate::query::graph::traverse_with_options(
1431            self.storage.connection(),
1432            &symbol.qualified_name,
1433            true,
1434            limit,
1435            &options,
1436        )?;
1437        if !include_tests {
1438            graph_edges.retain(|edge| {
1439                edge.callsite.as_ref().is_none_or(|callsite| !is_test_like_path(&callsite.path))
1440            });
1441        }
1442        let (logical_symbol, variants) = self.graph_logical_symbol(options.logical_symbol_id)?;
1443        let text_hits = self.regex_hits(pattern, &regex, include_tests)?;
1444        let text_by_location = text_hits
1445            .iter()
1446            .map(|hit| ((hit.path.clone(), hit.line), hit))
1447            .collect::<BTreeMap<_, _>>();
1448        let graph_by_location = graph_edges
1449            .iter()
1450            .filter_map(|edge| {
1451                edge.callsite
1452                    .as_ref()
1453                    .map(|callsite| ((callsite.path.clone(), callsite.line), edge))
1454            })
1455            .collect::<BTreeMap<_, _>>();
1456
1457        let mut paths = BTreeSet::new();
1458        paths.insert(symbol.path.clone());
1459        for hit in &text_hits {
1460            paths.insert(hit.path.clone());
1461        }
1462        for edge in &graph_edges {
1463            if let Some(callsite) = &edge.callsite {
1464                paths.insert(callsite.path.clone());
1465            }
1466        }
1467
1468        let parser_failure_paths = self
1469            .parser_failure_paths()?
1470            .into_iter()
1471            .map(|failure| failure.path)
1472            .collect::<BTreeSet<_>>();
1473        let mut matched_hits = Vec::new();
1474        let mut text_only_hits = Vec::new();
1475        let mut likely_parser_gaps = Vec::new();
1476        for hit in &text_hits {
1477            if let Some(edge) = graph_by_location.get(&(hit.path.clone(), hit.line)) {
1478                matched_hits.push(crate::query::graph::MatchedGraphTextHit {
1479                    path: hit.path.clone(),
1480                    line: hit.line,
1481                    text: hit.text.clone(),
1482                    target: edge.target.clone(),
1483                    edge_kind: edge.edge_kind.clone(),
1484                    confidence: edge.confidence.clone(),
1485                    resolution: edge.resolution.clone(),
1486                });
1487            } else {
1488                let gap_kind = classify_text_only_hit(&hit.path, &hit.text, &parser_failure_paths);
1489                let text_only_hit = crate::query::graph::TextOnlyHit {
1490                    path: hit.path.clone(),
1491                    line: hit.line,
1492                    text: hit.text.clone(),
1493                    reason: if gap_kind == "parser_call_extraction" || gap_kind == "parser_failure"
1494                    {
1495                        "no graph edge extracted"
1496                    } else {
1497                        "text mention outside graph-call evidence"
1498                    }
1499                    .to_string(),
1500                    likely_gap: gap_kind.to_string(),
1501                };
1502                if is_likely_parser_gap_kind(gap_kind) {
1503                    likely_parser_gaps.push(text_only_hit.clone());
1504                }
1505                text_only_hits.push(text_only_hit);
1506            }
1507        }
1508
1509        let mut graph_only_edges = Vec::new();
1510        let mut likely_false_positives = Vec::new();
1511        for edge in &graph_edges {
1512            let Some(callsite) = &edge.callsite else {
1513                continue;
1514            };
1515            if text_by_location.contains_key(&(callsite.path.clone(), callsite.line)) {
1516                continue;
1517            }
1518            let current_line = self.current_line_text(&callsite.path, callsite.line)?;
1519            let graph_only = crate::query::graph::GraphOnlyEdge {
1520                path: callsite.path.clone(),
1521                line: callsite.line,
1522                target: edge.target.clone(),
1523                edge_kind: edge.edge_kind.clone(),
1524                confidence: edge.confidence.clone(),
1525                resolution: edge.resolution.clone(),
1526                evidence: edge.evidence.clone(),
1527                reason: "graph edge exists but pattern did not match text".to_string(),
1528                likely_reason: graph_only_reason(edge, current_line.as_deref()),
1529            };
1530            if is_likely_false_positive_graph_only(edge, &graph_only) {
1531                likely_false_positives.push(graph_only.clone());
1532            }
1533            graph_only_edges.push(graph_only);
1534        }
1535        let complete = likely_parser_gaps.is_empty() && likely_false_positives.is_empty();
1536        let recommended_fallback =
1537            recommended_graph_text_fallback(&likely_parser_gaps, &graph_only_edges);
1538        let pattern_match_mode = compare_pattern_match_mode(pattern, &symbol.name);
1539        let mut warnings = Vec::new();
1540        if pattern_match_mode == "substring_identifier" {
1541            warnings.push(format!(
1542                "pattern may match identifiers that merely contain `{}`; use an identifier boundary or escaped call suffix for exact text auditing",
1543                symbol.name
1544            ));
1545        }
1546
1547        Ok(crate::query::graph::CompareGraphTextReport {
1548            query: crate::query::graph::CompareGraphTextQuery {
1549                symbol_id: Some(symbol.symbol_id),
1550                logical_symbol_id: options.logical_symbol_id,
1551                symbol_path: symbol.qualified_name.clone(),
1552                pattern: pattern.to_string(),
1553                resolution: options.resolution_mode.as_str().to_string(),
1554                include_tests,
1555            },
1556            logical_symbol,
1557            variants,
1558            summary: crate::query::graph::CompareGraphTextSummary {
1559                graph_hits: u64::try_from(graph_edges.len()).unwrap_or(u64::MAX),
1560                graph_edges: u64::try_from(graph_edges.len()).unwrap_or(u64::MAX),
1561                text_hits: u64::try_from(text_hits.len()).unwrap_or(u64::MAX),
1562                matched: u64::try_from(matched_hits.len()).unwrap_or(u64::MAX),
1563                graph_only: u64::try_from(graph_only_edges.len()).unwrap_or(u64::MAX),
1564                text_only: u64::try_from(text_only_hits.len()).unwrap_or(u64::MAX),
1565                text_mentions: u64::try_from(text_only_hits.len() - likely_parser_gaps.len())
1566                    .unwrap_or(u64::MAX),
1567                likely_parser_gaps: u64::try_from(likely_parser_gaps.len()).unwrap_or(u64::MAX),
1568                likely_false_positives: u64::try_from(likely_false_positives.len())
1569                    .unwrap_or(u64::MAX),
1570                likely_index_gaps: u64::try_from(likely_parser_gaps.len()).unwrap_or(u64::MAX),
1571                complete,
1572                recommended_fallback,
1573                pattern_match_mode,
1574                warnings,
1575            },
1576            coverage: self.graph_coverage(paths)?,
1577            matched_hits,
1578            text_only_hits,
1579            graph_only_edges,
1580            likely_parser_gaps,
1581            likely_false_positives,
1582        })
1583    }
1584
1585    fn graph_logical_symbol(
1586        &self,
1587        logical_symbol_id: Option<i64>,
1588    ) -> anyhow::Result<(
1589        Option<crate::query::graph::LogicalSymbol>,
1590        Vec<crate::query::graph::LogicalSymbolVariant>,
1591    )> {
1592        let Some(logical_symbol_id) = logical_symbol_id else {
1593            return Ok((None, Vec::new()));
1594        };
1595        let Some(logical) = crate::query::symbol::lookup_logical_by_id(
1596            self.storage.connection(),
1597            logical_symbol_id,
1598        )?
1599        else {
1600            return Ok((None, Vec::new()));
1601        };
1602        let variants = crate::query::symbol::logical_members(
1603            self.storage.connection(),
1604            logical.logical_symbol_id,
1605        )?
1606        .into_iter()
1607        .map(|member| crate::query::graph::LogicalSymbolVariant {
1608            symbol_id: member.symbol_id,
1609            cfg_expr: member.cfg_expr,
1610            signature_hash: member.signature_hash,
1611            start_line: member.start_line,
1612            end_line: member.end_line,
1613        })
1614        .collect::<Vec<_>>();
1615        Ok((
1616            Some(crate::query::graph::LogicalSymbol {
1617                logical_symbol_id: logical.logical_symbol_id,
1618                qualified_name: logical.qualified_name,
1619                variant_count: logical.variant_count,
1620                group_reason: logical.group_reason,
1621            }),
1622            variants,
1623        ))
1624    }
1625
1626    fn graph_options_with_logical_group(
1627        &self,
1628        options: &crate::query::graph::GraphTraversalOptions,
1629    ) -> anyhow::Result<crate::query::graph::GraphTraversalOptions> {
1630        if options.logical_symbol_id.is_some() {
1631            return Ok(options.clone());
1632        }
1633        let Some(symbol_id) = options.symbol_id else {
1634            return Ok(options.clone());
1635        };
1636        let Some(logical) =
1637            crate::query::symbol::logical_for_symbol_id(self.storage.connection(), symbol_id)?
1638        else {
1639            return Ok(options.clone());
1640        };
1641        let mut options = options.clone();
1642        options.logical_symbol_id = Some(logical.logical_symbol_id);
1643        Ok(options)
1644    }
1645
1646    fn local_symbol_context_hits(
1647        &self,
1648        symbol: &crate::query::symbol::SymbolHit,
1649        limit: u32,
1650    ) -> anyhow::Result<Vec<SearchHit>> {
1651        let mut stmt = self.storage.connection().prepare(
1652            "
1653            SELECT chunks.id, files.path, files.language, files.kind,
1654                   chunks.start_line, chunks.end_line, chunks.symbol_path, chunks.text
1655            FROM chunks
1656            JOIN files ON files.id = chunks.file_id
1657            WHERE files.path = ?1
1658              AND (
1659                chunks.symbol_path = ?2
1660                OR chunks.symbol_path LIKE ?3
1661                OR chunks.text LIKE ?4
1662              )
1663            ORDER BY
1664              CASE
1665                WHEN chunks.symbol_path = ?2 THEN 0
1666                WHEN chunks.symbol_path LIKE ?3 THEN 1
1667                ELSE 2
1668              END,
1669              chunks.start_line
1670            LIMIT ?5
1671            ",
1672        )?;
1673        let rows = stmt.query_map(
1674            params![
1675                symbol.path,
1676                symbol.qualified_name,
1677                format!("%{}%", symbol.name),
1678                format!("%{}%", symbol.name),
1679                i64::from(limit.max(1)),
1680            ],
1681            |row| {
1682                let text: String = row.get(7)?;
1683                Ok(SearchHit {
1684                    chunk_id: row.get(0)?,
1685                    path: row.get(1)?,
1686                    language: row.get(2)?,
1687                    kind: row.get(3)?,
1688                    start_line: row.get(4)?,
1689                    end_line: row.get(5)?,
1690                    symbol_path: row.get(6)?,
1691                    score: 1.0,
1692                    summary: bounded_summary(&text),
1693                    graph: None,
1694                    score_components: None,
1695                })
1696            },
1697        )?;
1698        let mut hits = Vec::new();
1699        for row in rows {
1700            hits.push(row?);
1701        }
1702        Ok(hits)
1703    }
1704
1705    pub fn impact_surface(
1706        &self,
1707        query: &str,
1708        limit: u32,
1709    ) -> anyhow::Result<Vec<crate::query::impact::ImpactItem>> {
1710        crate::query::impact::impact_surface(self.storage.connection(), query, limit)
1711    }
1712
1713    pub fn impact_surface_with_options(
1714        &self,
1715        query: &str,
1716        limit: u32,
1717        resolution_mode: crate::query::graph::GraphResolutionMode,
1718    ) -> anyhow::Result<Vec<crate::query::impact::ImpactItem>> {
1719        crate::query::impact::impact_surface_with_options(
1720            self.storage.connection(),
1721            query,
1722            limit,
1723            resolution_mode,
1724        )
1725    }
1726
1727    pub fn impact_surface_for_selected_symbol(
1728        &self,
1729        symbol: &crate::query::symbol::SymbolHit,
1730        limit: u32,
1731        resolution_mode: crate::query::graph::GraphResolutionMode,
1732    ) -> anyhow::Result<Vec<crate::query::impact::ImpactItem>> {
1733        crate::query::impact::impact_surface_for_symbol(
1734            self.storage.connection(),
1735            symbol,
1736            limit,
1737            resolution_mode,
1738        )
1739    }
1740
1741    pub fn impact_surface_report_for_selected_symbol(
1742        &self,
1743        symbol: &crate::query::symbol::SymbolHit,
1744        limit: u32,
1745        options: &crate::query::impact::ImpactSurfaceOptions,
1746    ) -> anyhow::Result<crate::query::impact::ImpactSurfaceReport> {
1747        crate::query::impact::impact_surface_report_for_symbol(
1748            self.storage.connection(),
1749            symbol,
1750            limit,
1751            options,
1752        )
1753    }
1754
1755    pub fn repo_brief(
1756        &self,
1757        options: crate::query::repo_brief::RepoBriefOptions,
1758    ) -> anyhow::Result<crate::query::repo_brief::RepoBrief> {
1759        crate::query::repo_brief::repo_brief(self.storage.connection(), options)
1760    }
1761
1762    pub fn repo_clusters(
1763        &self,
1764        options: crate::query::clusters::RepoClustersOptions,
1765    ) -> anyhow::Result<crate::query::clusters::RepoClustersReport> {
1766        crate::query::clusters::repo_clusters(self.storage.connection(), options)
1767    }
1768
1769    pub fn memory_create(
1770        &self,
1771        request: crate::query::memory::RepoMemoryCreate,
1772    ) -> anyhow::Result<crate::query::memory::RepoMemoryCreateResult> {
1773        crate::query::memory::create_memory(self.storage.connection(), request)
1774    }
1775
1776    pub fn memory_update(
1777        &self,
1778        update: crate::query::memory::RepoMemoryUpdate,
1779    ) -> anyhow::Result<crate::query::memory::RepoMemory> {
1780        crate::query::memory::update_memory(self.storage.connection(), update)
1781    }
1782
1783    pub fn memory_mark_obsolete(
1784        &self,
1785        memory_id: &str,
1786    ) -> anyhow::Result<crate::query::memory::RepoMemory> {
1787        crate::query::memory::mark_obsolete(self.storage.connection(), memory_id)
1788    }
1789
1790    pub fn memory_search(
1791        &self,
1792        query: &str,
1793        limit: u32,
1794    ) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
1795        crate::query::memory::memory_search(self.storage.connection(), query, limit)
1796    }
1797
1798    pub fn memory_for_symbol(
1799        &self,
1800        symbol: &crate::query::symbol::SymbolHit,
1801        limit: u32,
1802    ) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
1803        crate::query::memory::memories_for_symbol(self.storage.connection(), symbol, limit)
1804    }
1805
1806    pub fn memory_for_path(
1807        &self,
1808        path: &str,
1809        limit: u32,
1810    ) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
1811        crate::query::memory::memories_for_path(self.storage.connection(), path, limit)
1812    }
1813
1814    pub fn memory_for_edges(
1815        &self,
1816        edge_ids: &[i64],
1817        limit: u32,
1818    ) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
1819        crate::query::memory::memories_for_edges(self.storage.connection(), edge_ids, limit)
1820    }
1821
1822    pub fn memory_evidence_for_symbol_and_edges(
1823        &self,
1824        symbol: &crate::query::symbol::SymbolHit,
1825        edge_ids: &[i64],
1826        limit: u32,
1827    ) -> anyhow::Result<crate::query::memory::RepoMemoryEvidence> {
1828        crate::query::memory::memory_evidence_for_symbol_and_edges(
1829            self.storage.connection(),
1830            symbol,
1831            edge_ids,
1832            limit,
1833        )
1834    }
1835
1836    pub fn memory_for_call_path_hash(
1837        &self,
1838        edge_sequence_hash: &str,
1839        limit: u32,
1840    ) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
1841        crate::query::memory::memories_for_call_path_hash(
1842            self.storage.connection(),
1843            edge_sequence_hash,
1844            limit,
1845        )
1846    }
1847
1848    pub fn memory_validate(
1849        &self,
1850    ) -> anyhow::Result<crate::query::memory::RepoMemoryValidationReport> {
1851        crate::query::memory::validate_memories(self.storage.connection())
1852    }
1853
1854    pub fn rebuild_fts(&self) -> anyhow::Result<()> {
1855        schema::rebuild_fts(self.storage.connection())?;
1856        self.record_content_revision()?;
1857        self.record_fts_current()?;
1858        self.set_meta("fts_dirty", "false")?;
1859        Ok(())
1860    }
1861
1862    pub fn sync_fts(&self) -> anyhow::Result<()> {
1863        self.record_content_revision()?;
1864        self.record_fts_current()?;
1865        self.set_meta("fts_dirty", "false")?;
1866        Ok(())
1867    }
1868
1869    fn record_fts_current(&self) -> anyhow::Result<()> {
1870        self.set_meta("fts_synced_at_ms", &now_ms().to_string())?;
1871        let revision = self.content_revision()?;
1872        self.set_meta("fts_source_revision", &revision)?;
1873        Ok(())
1874    }
1875
1876    fn record_content_revision(&self) -> anyhow::Result<String> {
1877        let revision = self.content_revision()?;
1878        self.set_meta("content_revision", &revision)?;
1879        Ok(revision)
1880    }
1881
1882    pub fn heal_file(&self, path: &Path) -> anyhow::Result<()> {
1883        let Some(root) = self.storage.source_root() else {
1884            anyhow::bail!("index has no source_root metadata; rebuild required");
1885        };
1886        let row = self.file_row(path)?;
1887        let full_path = root.join(path);
1888        let text = fs::read_to_string(&full_path)?;
1889
1890        let changes = git_changed_paths(root).unwrap_or_default();
1891        let is_dirty = changes.changed.contains(path);
1892        let has_base_commit = !self.active_commit_sha.is_empty();
1893        let scope = if !has_base_commit || is_dirty {
1894            FileScope::worktree(self.active_worktree_id.clone())
1895        } else {
1896            FileScope::commit(self.active_commit_sha.clone())
1897        };
1898        self.remove_file_in_scope(path, &scope.commit_sha, &scope.worktree_id)?;
1899
1900        self.index_file(
1901            path,
1902            row.language,
1903            row.kind,
1904            file_metadata_ms(&full_path)?,
1905            &text,
1906            &scope,
1907        )?;
1908        self.rebuild_logical_symbols()?;
1909        self.resolve_edges()
1910    }
1911
1912    fn index_file(
1913        &self,
1914        path: &Path,
1915        language: Language,
1916        kind: TargetKind,
1917        modified_at_ms: i64,
1918        text: &str,
1919        scope: &FileScope,
1920    ) -> anyhow::Result<()> {
1921        if language != Language::Markdown && kind != TargetKind::Generated {
1922            if text.len() > chunker::MAX_STRUCTURAL_PARSE_BYTES {
1923                // Large source files are intentionally coarse-indexed to keep full-repo indexing
1924                // responsive. This is not a parser failure.
1925            } else if let Some(message) = parser::parse_error(path, language, text)
1926                .unwrap_or_else(|err| Some(err.to_string()))
1927            {
1928                self.insert_parser_failure(path, language, &message)?;
1929            }
1930        }
1931        let sha256 = hex_sha256(text.as_bytes());
1932        let file_id = self.storage.connection().query_row(
1933            "INSERT INTO main.files(path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms, indexed_revision, commit_sha, worktree_id)
1934             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)
1935             RETURNING id",
1936            params![
1937                path_string(path),
1938                language.as_str(),
1939                kind.as_str(),
1940                sha256,
1941                modified_at_ms,
1942                matches!(kind, TargetKind::Generated),
1943                now_ms(),
1944                sha256,
1945                &scope.commit_sha,
1946                &scope.worktree_id,
1947            ],
1948            |row| row.get::<_, i64>(0),
1949        )?;
1950        let chunks = if kind == TargetKind::Generated {
1951            chunker::generated_chunks_for_file(path, text)
1952        } else {
1953            chunker::chunks_for_file(path, language, text)
1954        };
1955        let symbols =
1956            if kind == TargetKind::Generated || text.len() > chunker::MAX_STRUCTURAL_PARSE_BYTES {
1957                Vec::new()
1958            } else {
1959                symbols::symbols_for_file(path, language, text)
1960            };
1961        self.insert_chunks(file_id, &sha256, &chunks, text)?;
1962        self.insert_symbols(file_id, language, &symbols)?;
1963        if kind != TargetKind::Generated && text.len() <= edges::MAX_GRAPH_PARSE_BYTES {
1964            edges::index_file_edges(self.storage.connection(), file_id, path, language, text)?;
1965        }
1966        self.mark_fts_dirty()?;
1967        Ok(())
1968    }
1969
1970    fn insert_prepared_file(&self, prepared_file: &PreparedIndexFile) -> anyhow::Result<()> {
1971        let file = &prepared_file.file;
1972        let prepared = match &prepared_file.prepared {
1973            Ok(prepared) => prepared,
1974            Err(err) => {
1975                self.insert_parser_failure(&file.relative_path, file.language, &err.to_string())?;
1976                return Ok(());
1977            },
1978        };
1979        if let Some(message) = &prepared.parser_failure {
1980            self.insert_parser_failure(&file.relative_path, file.language, message)?;
1981        }
1982        let file_id = self.storage.connection().query_row(
1983            "INSERT INTO main.files(path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms, indexed_revision, commit_sha, worktree_id)
1984             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)
1985             RETURNING id",
1986            params![
1987                path_string(&file.relative_path),
1988                file.language.as_str(),
1989                file.kind.as_str(),
1990                prepared.sha256,
1991                prepared.modified_at_ms,
1992                matches!(file.kind, TargetKind::Generated),
1993                now_ms(),
1994                prepared.sha256,
1995                file.commit_sha,
1996                file.worktree_id,
1997            ],
1998            |row| row.get::<_, i64>(0),
1999        )?;
2000        self.insert_chunks(file_id, &prepared.sha256, &prepared.chunks, &prepared.text)?;
2001        self.insert_symbols(file_id, file.language, &prepared.symbols)?;
2002        if file.kind != TargetKind::Generated && prepared.text.len() <= edges::MAX_GRAPH_PARSE_BYTES
2003        {
2004            edges::index_file_edges(
2005                self.storage.connection(),
2006                file_id,
2007                &file.relative_path,
2008                file.language,
2009                &prepared.text,
2010            )?;
2011        }
2012        self.mark_fts_dirty()?;
2013        Ok(())
2014    }
2015
2016    fn insert_chunks(
2017        &self,
2018        file_id: i64,
2019        source_revision: &str,
2020        chunks: &[Chunk],
2021        full_text: &str,
2022    ) -> anyhow::Result<()> {
2023        let (path, language, kind) = self.storage.connection().query_row(
2024            "SELECT path, language, kind FROM main.files WHERE id = ?1",
2025            [file_id],
2026            |row| {
2027                Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?, row.get::<_, String>(2)?))
2028            },
2029        )?;
2030        for chunk in chunks {
2031            let anchor =
2032                anchors::anchor_for_text(&chunk.text, chunk.start_line, chunk.end_line, full_text);
2033            let embedding_policy = ai::embedding_policy_for_chunk(
2034                Path::new(&path),
2035                &language,
2036                &kind,
2037                chunk.kind,
2038                chunk.symbol_path.as_deref(),
2039                &chunk.text,
2040                ai::DEFAULT_MAX_EMBEDDING_CHARS,
2041            );
2042            self.storage.connection().execute(
2043                "INSERT INTO chunks(file_id, chunk_kind, symbol_path, start_byte, end_byte, start_line, end_line, text, text_hash,
2044                                    source_revision, anchor_version, normalized_hash, start_boundary_hash, end_boundary_hash,
2045                                    start_context_hash, end_context_hash, context_radius, embedding_policy, embedding_priority)
2046                 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19)",
2047                params![
2048                    file_id,
2049                    chunk.kind,
2050                    chunk.symbol_path,
2051                    i64::try_from(chunk.start_byte)?,
2052                    i64::try_from(chunk.end_byte)?,
2053                    i64::try_from(chunk.start_line)?,
2054                    i64::try_from(chunk.end_line)?,
2055                    chunk.text,
2056                    hex_sha256(chunk.text.as_bytes()),
2057                    source_revision,
2058                    anchor.version,
2059                    anchor.normalized_hash,
2060                    anchor.start_boundary_hash,
2061                    anchor.end_boundary_hash,
2062                    anchor.start_context_hash,
2063                    anchor.end_context_hash,
2064                    anchor.context_radius,
2065                    embedding_policy.policy,
2066                    embedding_policy.priority,
2067                ],
2068            )?;
2069            let chunk_id = self.storage.connection().last_insert_rowid();
2070            self.storage.connection().execute(
2071                "INSERT INTO chunk_fts(rowid, text) VALUES (?1, ?2)",
2072                params![chunk_id, chunk.text],
2073            )?;
2074        }
2075        Ok(())
2076    }
2077
2078    fn insert_symbols(
2079        &self,
2080        file_id: i64,
2081        language: Language,
2082        symbols: &[Symbol],
2083    ) -> anyhow::Result<()> {
2084        for symbol in symbols {
2085            self.storage.connection().execute(
2086                "INSERT INTO symbols(file_id, language, name, qualified_name, kind, start_byte, end_byte, signature, docs)
2087                 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)",
2088                params![
2089                    file_id,
2090                    language.as_str(),
2091                    symbol.name,
2092                    symbol.qualified_name,
2093                    symbol.kind,
2094                    i64::try_from(symbol.start_byte)?,
2095                    i64::try_from(symbol.end_byte)?,
2096                    symbol.signature,
2097                    symbol.docs,
2098                ],
2099            )?;
2100            let symbol_id = self.storage.connection().last_insert_rowid();
2101            for fact in &symbol.facts {
2102                self.storage.connection().execute(
2103                    "INSERT OR IGNORE INTO symbol_facts(symbol_id, fact_kind, fact_value)
2104                     VALUES (?1, ?2, ?3)",
2105                    params![symbol_id, fact.kind, fact.value],
2106                )?;
2107            }
2108        }
2109        Ok(())
2110    }
2111
2112    fn write_git_meta(&self, root: &Path) -> anyhow::Result<()> {
2113        self.set_meta("git_commit", &git_output(root, &["rev-parse", "HEAD"]).unwrap_or_default())?;
2114        let dirty = !git_output(root, &["status", "--porcelain"]).unwrap_or_default().is_empty();
2115        self.set_meta("git_dirty", if dirty { "true" } else { "false" })?;
2116        Ok(())
2117    }
2118
2119    fn apply_prepared_git_history(
2120        &self,
2121        root: &Path,
2122        handle: JoinHandle<anyhow::Result<git_history::PreparedGitHistory>>,
2123    ) -> anyhow::Result<GitHistoryIndexStatus> {
2124        let prepared = join_git_history_prepare(handle)?;
2125        git_history::apply_prepared(self.storage.connection(), root, prepared)
2126    }
2127
2128    fn git_history_status(&self) -> anyhow::Result<GitHistoryIndexStatus> {
2129        let Some(root) = self.storage.source_root() else {
2130            return git_history::status(self.storage.connection(), Path::new("."));
2131        };
2132        git_history::status(self.storage.connection(), root)
2133    }
2134
2135    fn github_status(&self) -> anyhow::Result<GitHubStatus> {
2136        github::status(self.storage.connection())
2137    }
2138
2139    fn mark_fts_dirty(&self) -> anyhow::Result<()> {
2140        self.set_meta("fts_dirty", "true")
2141    }
2142
2143    fn resolve_edges(&self) -> anyhow::Result<()> {
2144        edges::resolve_all_edges(self.storage.connection())
2145    }
2146
2147    fn rebuild_logical_symbols(&self) -> anyhow::Result<()> {
2148        self.storage.connection().execute_batch(
2149            "
2150            CREATE TEMP TABLE IF NOT EXISTS logical_symbols_to_rebuild(id INTEGER PRIMARY KEY);
2151            DELETE FROM temp.logical_symbols_to_rebuild;
2152            INSERT OR IGNORE INTO temp.logical_symbols_to_rebuild(id)
2153            SELECT logical_symbol_members.logical_symbol_id
2154            FROM main.logical_symbol_members
2155            JOIN main.symbols ON symbols.id = logical_symbol_members.symbol_id
2156            JOIN files ON files.id = symbols.file_id;
2157            DELETE FROM main.logical_symbol_members
2158            WHERE logical_symbol_id IN (
2159                SELECT id FROM temp.logical_symbols_to_rebuild
2160            );
2161            DELETE FROM main.logical_symbols
2162            WHERE id IN (
2163                SELECT id FROM temp.logical_symbols_to_rebuild
2164            );
2165            DELETE FROM temp.logical_symbols_to_rebuild;
2166            ",
2167        )?;
2168
2169        let mut stmt = self.storage.connection().prepare(
2170            "
2171            SELECT symbols.id, symbols.file_id, files.path, symbols.language, symbols.name,
2172                   symbols.qualified_name, symbols.kind, symbols.start_byte, symbols.end_byte,
2173                   symbols.signature,
2174                   COALESCE((
2175                     SELECT chunks.start_byte
2176                     FROM chunks
2177                     WHERE chunks.file_id = symbols.file_id
2178                       AND symbols.start_byte >= chunks.start_byte
2179                       AND symbols.start_byte < chunks.end_byte
2180                     ORDER BY chunks.end_byte - chunks.start_byte ASC
2181                     LIMIT 1
2182                   ), symbols.start_byte) AS chunk_start_byte,
2183                   COALESCE((
2184                     SELECT chunks.start_line
2185                     FROM chunks
2186                     WHERE chunks.file_id = symbols.file_id
2187                       AND symbols.start_byte >= chunks.start_byte
2188                       AND symbols.start_byte < chunks.end_byte
2189                     ORDER BY chunks.end_byte - chunks.start_byte ASC
2190                     LIMIT 1
2191                   ), 1) AS chunk_start_line,
2192                   COALESCE((
2193                     SELECT chunks.text
2194                     FROM chunks
2195                     WHERE chunks.file_id = symbols.file_id
2196                       AND symbols.start_byte >= chunks.start_byte
2197                       AND symbols.start_byte < chunks.end_byte
2198                     ORDER BY chunks.end_byte - chunks.start_byte ASC
2199                     LIMIT 1
2200                   ), '') AS chunk_text
2201            FROM symbols
2202            JOIN files ON files.id = symbols.file_id
2203            ORDER BY files.path, symbols.language, symbols.qualified_name, symbols.kind,
2204                     symbols.start_byte, symbols.end_byte
2205            ",
2206        )?;
2207        let rows = stmt.query_map([], |row| {
2208            let start_byte = usize::try_from(row.get::<_, i64>(7)?).unwrap_or(0);
2209            let end_byte = usize::try_from(row.get::<_, i64>(8)?).unwrap_or(0);
2210            let chunk_start_byte = usize::try_from(row.get::<_, i64>(10)?).unwrap_or(start_byte);
2211            let chunk_start_line = row.get::<_, i64>(11)?;
2212            let chunk_text: String = row.get(12)?;
2213            let start_line =
2214                symbol_line_for_byte(&chunk_text, chunk_start_byte, chunk_start_line, start_byte);
2215            let end_line =
2216                symbol_line_for_byte(&chunk_text, chunk_start_byte, chunk_start_line, end_byte);
2217            Ok(LogicalSymbolMemberRow {
2218                symbol_id: row.get(0)?,
2219                path: row.get(2)?,
2220                language: row.get(3)?,
2221                name: row.get(4)?,
2222                qualified_name: row.get(5)?,
2223                kind: row.get(6)?,
2224                signature: row.get(9)?,
2225                start_line,
2226                end_line,
2227            })
2228        })?;
2229        let mut groups: BTreeMap<LogicalSymbolKey, Vec<LogicalSymbolMemberRow>> = BTreeMap::new();
2230        for row in rows {
2231            let row = row?;
2232            groups.entry(LogicalSymbolKey::from(&row)).or_default().push(row);
2233        }
2234        for (key, members) in groups {
2235            let group_reason = if members.len() > 1 { "cfg_variant" } else { "single" };
2236            self.storage.connection().execute(
2237                "
2238                INSERT INTO logical_symbols(language, path, logical_name, qualified_name, kind, variant_count, group_reason)
2239                VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)
2240                ",
2241                params![
2242                    key.language,
2243                    key.path,
2244                    key.name,
2245                    key.qualified_name,
2246                    key.kind,
2247                    i64::try_from(members.len()).unwrap_or(i64::MAX),
2248                    group_reason,
2249                ],
2250            )?;
2251            let logical_symbol_id = self.storage.connection().last_insert_rowid();
2252            for member in members {
2253                let signature_hash =
2254                    member.signature.as_deref().map(|signature| hex_sha256(signature.as_bytes()));
2255                self.storage.connection().execute(
2256                    "
2257                    INSERT INTO logical_symbol_members(
2258                        logical_symbol_id, symbol_id, cfg_expr, signature_hash, start_line, end_line
2259                    )
2260                    VALUES (?1, ?2, NULL, ?3, ?4, ?5)
2261                    ",
2262                    params![
2263                        logical_symbol_id,
2264                        member.symbol_id,
2265                        signature_hash,
2266                        member.start_line,
2267                        member.end_line,
2268                    ],
2269                )?;
2270            }
2271        }
2272        Ok(())
2273    }
2274
2275    fn graph_coverage(
2276        &self,
2277        paths: BTreeSet<String>,
2278    ) -> anyhow::Result<crate::query::graph::GraphCoverage> {
2279        let indexed_files =
2280            self.storage
2281                .connection()
2282                .query_row("SELECT COUNT(*) FROM files", [], |row| row.get::<_, i64>(0))?;
2283        let parser_failure_paths = self.parser_failure_paths()?;
2284        let parser_failures = u64::try_from(parser_failure_paths.len()).unwrap_or(0);
2285        let known_index_gaps = parser_failure_paths
2286            .iter()
2287            .map(|failure| {
2288                format!(
2289                    "{} parser failed for {}: {}",
2290                    failure.language, failure.path, failure.message
2291                )
2292            })
2293            .collect::<Vec<_>>();
2294        let mut stale_files = 0_u64;
2295        let mut parser_coverage_for_paths = Vec::new();
2296        for path in paths {
2297            let Some(row) = self.graph_path_row(&path)? else {
2298                parser_coverage_for_paths.push(crate::query::graph::GraphPathCoverage {
2299                    path,
2300                    language: "unknown".to_string(),
2301                    parser_status: "missing_from_index".to_string(),
2302                    graph_status: "missing_from_index".to_string(),
2303                    last_indexed_revision: None,
2304                });
2305                continue;
2306            };
2307            let stale = self.source_path_is_stale(&path, &row.sha256);
2308            if stale {
2309                stale_files += 1;
2310            }
2311            let parser_failed = parser_failure_paths.iter().any(|failure| failure.path == path);
2312            parser_coverage_for_paths.push(crate::query::graph::GraphPathCoverage {
2313                path,
2314                language: row.language,
2315                parser_status: if parser_failed { "failed" } else { "ok" }.to_string(),
2316                graph_status: if stale {
2317                    "stale_source"
2318                } else if parser_failed {
2319                    "parser_failed"
2320                } else {
2321                    "ok"
2322                }
2323                .to_string(),
2324                last_indexed_revision: (!row.indexed_revision.is_empty())
2325                    .then_some(row.indexed_revision),
2326            });
2327        }
2328        Ok(crate::query::graph::GraphCoverage {
2329            indexed_files: u64::try_from(indexed_files).unwrap_or(0),
2330            parser_failures,
2331            stale_files,
2332            known_index_gaps,
2333            parser_coverage_for_paths,
2334        })
2335    }
2336
2337    fn graph_path_row(&self, path: &str) -> anyhow::Result<Option<GraphPathRow>> {
2338        self.storage
2339            .connection()
2340            .query_row(
2341                "SELECT language, sha256, indexed_revision FROM files WHERE path = ?1",
2342                [path],
2343                |row| {
2344                    Ok(GraphPathRow {
2345                        language: row.get(0)?,
2346                        sha256: row.get(1)?,
2347                        indexed_revision: row.get(2)?,
2348                    })
2349                },
2350            )
2351            .optional()
2352            .map_err(Into::into)
2353    }
2354
2355    fn source_path_is_stale(&self, path: &str, indexed_sha256: &str) -> bool {
2356        let Some(root) = self.storage.source_root() else {
2357            return false;
2358        };
2359        let Ok(bytes) = fs::read(root.join(path)) else {
2360            return true;
2361        };
2362        hex_sha256(&bytes) != indexed_sha256
2363    }
2364
2365    fn regex_hits(
2366        &self,
2367        pattern: &str,
2368        regex: &Regex,
2369        include_tests: bool,
2370    ) -> anyhow::Result<Vec<crate::query::graph::TextOnlyHit>> {
2371        let Some(root) = self.storage.source_root() else {
2372            anyhow::bail!("cannot compare graph to text: source_root is missing from index_meta");
2373        };
2374        let mut stmt = self.storage.connection().prepare("SELECT path FROM files ORDER BY path")?;
2375        let paths =
2376            stmt.query_map([], |row| row.get::<_, String>(0))?.collect::<Result<Vec<_>, _>>()?;
2377        let mut hits = Vec::new();
2378        for path in paths {
2379            if !include_tests && is_test_like_path(&path) {
2380                continue;
2381            }
2382            let full_path = root.join(&path);
2383            let Ok(text) = fs::read_to_string(&full_path) else {
2384                continue;
2385            };
2386            for (index, line) in text.lines().enumerate() {
2387                if regex.is_match(line) {
2388                    hits.push(crate::query::graph::TextOnlyHit {
2389                        path: path.clone(),
2390                        line: i64::try_from(index + 1).unwrap_or(i64::MAX),
2391                        text: line.trim().to_string(),
2392                        reason: "text pattern matched".to_string(),
2393                        likely_gap: pattern.to_string(),
2394                    });
2395                }
2396            }
2397        }
2398        Ok(hits)
2399    }
2400
2401    fn current_line_text(&self, path: &str, line: i64) -> anyhow::Result<Option<String>> {
2402        let Some(root) = self.storage.source_root() else {
2403            return Ok(None);
2404        };
2405        let Ok(text) = fs::read_to_string(root.join(path)) else {
2406            return Ok(None);
2407        };
2408        let Some(index) = usize::try_from(line.saturating_sub(1)).ok() else {
2409            return Ok(None);
2410        };
2411        Ok(text.lines().nth(index).map(|line| line.trim().to_string()))
2412    }
2413
2414    fn ensure_graph_index_current(&self) -> anyhow::Result<()> {
2415        if self.meta("graph_index_version")?.as_deref() == Some(GRAPH_INDEX_VERSION) {
2416            return Ok(());
2417        }
2418        let Some(root) = self.storage.source_root().map(Path::to_path_buf) else {
2419            return Ok(());
2420        };
2421        self.storage.execute_batch("BEGIN IMMEDIATE TRANSACTION")?;
2422        let result = (|| -> anyhow::Result<()> {
2423            self.storage.connection().execute("DELETE FROM edges", [])?;
2424            let files = self.graph_reindex_files()?;
2425            for file in files {
2426                if file.kind == TargetKind::Generated || file.language == Language::Markdown {
2427                    continue;
2428                }
2429                let full_path = root.join(&file.path);
2430                let Ok(text) = fs::read_to_string(full_path) else {
2431                    continue;
2432                };
2433                if text.len() > edges::MAX_GRAPH_PARSE_BYTES {
2434                    continue;
2435                }
2436                edges::index_file_edges(
2437                    self.storage.connection(),
2438                    file.id,
2439                    Path::new(&file.path),
2440                    file.language,
2441                    &text,
2442                )?;
2443            }
2444            self.resolve_edges()?;
2445            self.mark_graph_index_current()?;
2446            Ok(())
2447        })();
2448        if result.is_err() {
2449            let _ = self.storage.execute_batch("ROLLBACK");
2450        }
2451        result?;
2452        self.storage.execute_batch("COMMIT")?;
2453        Ok(())
2454    }
2455
2456    fn mark_graph_index_current(&self) -> anyhow::Result<()> {
2457        self.set_meta("graph_index_version", GRAPH_INDEX_VERSION)
2458    }
2459
2460    fn set_meta(&self, key: &str, value: &str) -> anyhow::Result<()> {
2461        self.storage.connection().execute(
2462            "INSERT INTO index_meta(key, value) VALUES (?1, ?2)
2463             ON CONFLICT(key) DO UPDATE SET value = excluded.value",
2464            params![key, value],
2465        )?;
2466        Ok(())
2467    }
2468
2469    fn meta(&self, key: &str) -> anyhow::Result<Option<String>> {
2470        meta_for(self.storage.connection(), key)
2471    }
2472
2473    fn insert_parser_failure(
2474        &self,
2475        path: &Path,
2476        language: Language,
2477        message: &str,
2478    ) -> anyhow::Result<()> {
2479        self.storage.connection().execute(
2480            "INSERT INTO parser_failures(path, language, message) VALUES (?1, ?2, ?3)",
2481            params![path_string(path), language.as_str(), message],
2482        )?;
2483        Ok(())
2484    }
2485
2486    fn parser_failure_count(&self) -> anyhow::Result<u64> {
2487        let count = self.storage.connection().query_row(
2488            "SELECT COUNT(*) FROM parser_failures",
2489            [],
2490            |row| row.get::<_, i64>(0),
2491        )?;
2492        Ok(u64::try_from(count).unwrap_or(0))
2493    }
2494
2495    fn parser_failure_paths(&self) -> anyhow::Result<Vec<ParserFailure>> {
2496        let mut stmt = self.storage.connection().prepare(
2497            "SELECT path, language, message FROM parser_failures ORDER BY path, language, message",
2498        )?;
2499        let rows = stmt.query_map([], |row| {
2500            Ok(ParserFailure { path: row.get(0)?, language: row.get(1)?, message: row.get(2)? })
2501        })?;
2502        let mut failures = Vec::new();
2503        for row in rows {
2504            failures.push(row?);
2505        }
2506        Ok(failures)
2507    }
2508
2509    fn search_with_heal(
2510        &self,
2511        query: &str,
2512        limit: u32,
2513        include_generated: bool,
2514        allow_heal: bool,
2515        explain: bool,
2516        options: SearchOptions,
2517    ) -> anyhow::Result<Vec<SearchHit>> {
2518        let hits = crate::search::lexical::search_with_options(
2519            self.storage.connection(),
2520            query,
2521            limit,
2522            include_generated,
2523            explain,
2524            options,
2525        )?;
2526        if !allow_heal {
2527            return Ok(hits);
2528        }
2529        let stale = self.stale_hit_paths(&hits)?;
2530        if stale.is_empty() {
2531            return Ok(hits);
2532        }
2533        if stale.len() > MAX_AUTO_HEAL_FILES_PER_CALL {
2534            anyhow::bail!(IndexError::NeedsReindex {
2535                stale_files: stale.len(),
2536                cap: MAX_AUTO_HEAL_FILES_PER_CALL,
2537            });
2538        }
2539        for path in stale {
2540            self.heal_file(Path::new(&path))?;
2541        }
2542        self.sync_fts()?;
2543        self.search_with_heal(query, limit, include_generated, false, explain, options)
2544    }
2545
2546    fn stale_hit_paths(&self, hits: &[SearchHit]) -> anyhow::Result<Vec<String>> {
2547        let Some(root) = self.storage.source_root() else {
2548            return Ok(Vec::new());
2549        };
2550        let mut stale = Vec::new();
2551        let mut seen = BTreeSet::new();
2552        for hit in hits {
2553            if !seen.insert(hit.path.clone()) {
2554                continue;
2555            }
2556            let source_path = root.join(&hit.path);
2557            let Ok(text) = fs::read_to_string(source_path) else {
2558                stale.push(hit.path.clone());
2559                continue;
2560            };
2561            let chunk = crate::query::read_chunk(self.storage.connection(), hit.chunk_id)?;
2562            let Some(chunk) = chunk else {
2563                stale.push(hit.path.clone());
2564                continue;
2565            };
2566            let anchor = self.chunk_anchor(hit.chunk_id)?;
2567            let status = anchors::validate(
2568                &chunk.text,
2569                usize::try_from(chunk.start_line).unwrap_or(1),
2570                usize::try_from(chunk.end_line).unwrap_or(1),
2571                &anchor,
2572                &text,
2573            );
2574            if !matches!(status, AnchorStatus::Exact) {
2575                stale.push(hit.path.clone());
2576            }
2577        }
2578        Ok(stale)
2579    }
2580
2581    fn chunk_anchor(&self, chunk_id: i64) -> anyhow::Result<ChunkAnchor> {
2582        Ok(self.storage.connection().query_row(
2583            "
2584            SELECT anchor_version, normalized_hash, start_boundary_hash, end_boundary_hash,
2585                   start_context_hash, end_context_hash, context_radius
2586            FROM chunks WHERE id = ?1
2587            ",
2588            [chunk_id],
2589            |row| {
2590                Ok(ChunkAnchor {
2591                    version: row.get(0)?,
2592                    normalized_hash: row.get(1)?,
2593                    start_boundary_hash: row.get(2)?,
2594                    end_boundary_hash: row.get(3)?,
2595                    start_context_hash: row.get(4)?,
2596                    end_context_hash: row.get(5)?,
2597                    context_radius: row.get(6)?,
2598                })
2599            },
2600        )?)
2601    }
2602
2603    fn mark_file_deleted(&self, path: &Path) -> anyhow::Result<()> {
2604        let path = path_string(path);
2605        self.remove_file_in_scope(Path::new(&path), "", &self.active_worktree_id)?;
2606        self.storage.connection().execute(
2607            "INSERT INTO main.files(path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms, indexed_revision, commit_sha, worktree_id)
2608             VALUES (?1, 'unknown', 'deleted', '', 0, 0, ?2, '', '', ?3)
2609             ON CONFLICT(path, commit_sha, worktree_id) DO UPDATE SET
2610                kind = 'deleted',
2611                sha256 = '',
2612                modified_at_ms = 0,
2613                indexed_at_ms = excluded.indexed_at_ms",
2614            params![path, now_ms(), self.active_worktree_id],
2615        )?;
2616        self.mark_fts_dirty()?;
2617        Ok(())
2618    }
2619
2620    fn remove_file_in_scope(
2621        &self,
2622        path: &Path,
2623        commit_sha: &str,
2624        worktree_id: &str,
2625    ) -> anyhow::Result<()> {
2626        let path = path_string(path);
2627        self.storage.connection().execute(
2628            "UPDATE edges
2629             SET to_symbol_id = NULL,
2630                 confidence = 'NameOnly'
2631             WHERE to_symbol_id IN (
2632                 SELECT symbols.id FROM symbols
2633                 JOIN main.files ON main.files.id = symbols.file_id
2634                 WHERE main.files.path = ?1
2635                   AND main.files.commit_sha = ?2
2636                   AND main.files.worktree_id = ?3
2637             )",
2638            params![path, commit_sha, worktree_id],
2639        )?;
2640        self.storage.connection().execute(
2641            "DELETE FROM edges
2642             WHERE source_file_id IN (
2643                    SELECT id FROM main.files
2644                    WHERE path = ?1 AND commit_sha = ?2 AND worktree_id = ?3
2645                )
2646                OR from_symbol_id IN (
2647                    SELECT symbols.id FROM symbols
2648                    JOIN main.files ON main.files.id = symbols.file_id
2649                    WHERE main.files.path = ?1
2650                      AND main.files.commit_sha = ?2
2651                      AND main.files.worktree_id = ?3
2652                )",
2653            params![path, commit_sha, worktree_id],
2654        )?;
2655        self.storage
2656            .connection()
2657            .execute("DELETE FROM parser_failures WHERE path = ?1", [&path])?;
2658        self.storage.connection().execute(
2659            "DELETE FROM chunk_fts
2660             WHERE rowid IN (
2661                 SELECT chunks.id FROM chunks
2662                 JOIN main.files ON main.files.id = chunks.file_id
2663                 WHERE main.files.path = ?1
2664                   AND main.files.commit_sha = ?2
2665                   AND main.files.worktree_id = ?3
2666             )",
2667            params![path, commit_sha, worktree_id],
2668        )?;
2669        self.storage.connection().execute(
2670            "DELETE FROM chunks
2671             WHERE file_id IN (
2672                SELECT id FROM main.files
2673                WHERE path = ?1 AND commit_sha = ?2 AND worktree_id = ?3
2674             )",
2675            params![path, commit_sha, worktree_id],
2676        )?;
2677        self.storage.connection().execute(
2678            "DELETE FROM symbols
2679             WHERE file_id IN (
2680                SELECT id FROM main.files
2681                WHERE path = ?1 AND commit_sha = ?2 AND worktree_id = ?3
2682             )",
2683            params![path, commit_sha, worktree_id],
2684        )?;
2685        self.storage.connection().execute(
2686            "DELETE FROM main.files WHERE path = ?1 AND commit_sha = ?2 AND worktree_id = ?3",
2687            params![path, commit_sha, worktree_id],
2688        )?;
2689        self.mark_fts_dirty()?;
2690        Ok(())
2691    }
2692
2693    fn ensure_fts_fresh(&self) -> anyhow::Result<()> {
2694        let content_revision = self.content_revision()?;
2695        let fts_source_revision = self.meta("fts_source_revision")?;
2696        if !self.fts_dirty()? && fts_source_revision.as_deref() == Some(content_revision.as_str()) {
2697            return Ok(());
2698        }
2699        self.rebuild_fts()?;
2700        let refreshed_revision = self.meta("fts_source_revision")?;
2701        if refreshed_revision.as_deref() != Some(content_revision.as_str()) {
2702            anyhow::bail!(
2703                "FTS freshness invariant failed: content_revision={content_revision}, fts_source_revision={}",
2704                refreshed_revision.unwrap_or_else(|| "<missing>".to_string())
2705            );
2706        }
2707        Ok(())
2708    }
2709
2710    fn fts_dirty(&self) -> anyhow::Result<bool> {
2711        Ok(self.meta("fts_dirty")?.as_deref() == Some("true"))
2712    }
2713
2714    fn file_row(&self, path: &Path) -> anyhow::Result<FileRow> {
2715        self.storage
2716            .connection()
2717            .query_row(
2718                "SELECT language, kind FROM files WHERE path = ?1",
2719                [path_string(path)],
2720                |row| {
2721                    let language: String = row.get(0)?;
2722                    let kind: String = row.get(1)?;
2723                    Ok((language, kind))
2724                },
2725            )
2726            .map_err(Into::into)
2727            .and_then(|(language, kind)| {
2728                Ok(FileRow { language: language.parse()?, kind: kind.parse()? })
2729            })
2730    }
2731
2732    fn graph_reindex_files(&self) -> anyhow::Result<Vec<GraphReindexFile>> {
2733        let mut stmt = self
2734            .storage
2735            .connection()
2736            .prepare("SELECT id, path, language, kind FROM files ORDER BY path")?;
2737        let rows = stmt.query_map([], |row| {
2738            let language: String = row.get(2)?;
2739            let kind: String = row.get(3)?;
2740            Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?, language, kind))
2741        })?;
2742        let mut files = Vec::new();
2743        for row in rows {
2744            let (id, path, language, kind) = row?;
2745            files.push(GraphReindexFile {
2746                id,
2747                path,
2748                language: language.parse()?,
2749                kind: kind.parse()?,
2750            });
2751        }
2752        Ok(files)
2753    }
2754
2755    fn indexed_files(&self) -> anyhow::Result<Vec<IndexedFile>> {
2756        let mut stmt =
2757            self.storage.connection().prepare("SELECT path, sha256 FROM files ORDER BY path")?;
2758        let rows =
2759            stmt.query_map([], |row| Ok(IndexedFile { path: row.get(0)?, sha256: row.get(1)? }))?;
2760        let mut files = Vec::new();
2761        for row in rows {
2762            files.push(row?);
2763        }
2764        Ok(files)
2765    }
2766
2767    fn indexed_file_count(&self) -> anyhow::Result<usize> {
2768        let count =
2769            self.storage
2770                .connection()
2771                .query_row("SELECT COUNT(*) FROM files", [], |row| row.get::<_, i64>(0))?;
2772        Ok(usize::try_from(count).unwrap_or(usize::MAX))
2773    }
2774
2775    fn content_revision(&self) -> anyhow::Result<String> {
2776        let value = self.storage.connection().query_row(
2777            "SELECT COALESCE(string_agg(path || ':' || sha256, ',' ORDER BY path), '') FROM files",
2778            [],
2779            |row| row.get::<_, String>(0),
2780        )?;
2781        Ok(hex_sha256(value.as_bytes()))
2782    }
2783}
2784
2785#[derive(Debug)]
2786struct FileRow {
2787    language: Language,
2788    kind: TargetKind,
2789}
2790
2791#[derive(Debug)]
2792struct GraphReindexFile {
2793    id: i64,
2794    path: String,
2795    language: Language,
2796    kind: TargetKind,
2797}
2798
2799#[derive(Debug)]
2800struct GraphPathRow {
2801    language: String,
2802    sha256: String,
2803    indexed_revision: String,
2804}
2805
2806fn rank_docs_for_symbol(symbol: &crate::query::symbol::SymbolHit, hits: &mut [SearchHit]) {
2807    let source_module = module_stem(&symbol.path);
2808    let symbol_name = symbol.name.to_ascii_lowercase();
2809    let qualified_name = symbol.qualified_name.to_ascii_lowercase();
2810    hits.sort_by(|a, b| {
2811        let a_rank = docs_locality_rank(symbol, &source_module, &symbol_name, &qualified_name, a);
2812        let b_rank = docs_locality_rank(symbol, &source_module, &symbol_name, &qualified_name, b);
2813        a_rank
2814            .cmp(&b_rank)
2815            .then_with(|| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal))
2816            .then_with(|| a.path.cmp(&b.path))
2817            .then_with(|| a.start_line.cmp(&b.start_line))
2818    });
2819    for (idx, hit) in hits.iter_mut().enumerate() {
2820        hit.score = (10_000usize.saturating_sub(idx)) as f64;
2821    }
2822}
2823
2824fn docs_locality_rank(
2825    symbol: &crate::query::symbol::SymbolHit,
2826    source_module: &str,
2827    symbol_name: &str,
2828    qualified_name: &str,
2829    hit: &SearchHit,
2830) -> u8 {
2831    let path = hit.path.to_ascii_lowercase();
2832    let summary = hit.summary.to_ascii_lowercase();
2833    let hit_symbol = hit.symbol_path.as_deref().unwrap_or_default().to_ascii_lowercase();
2834    if hit.path == symbol.path && hit_symbol == symbol.qualified_name.to_ascii_lowercase() {
2835        return 0;
2836    }
2837    if hit.path == symbol.path {
2838        return 1;
2839    }
2840    if !source_module.is_empty()
2841        && path.contains(source_module)
2842        && (summary.contains(symbol_name) || hit_symbol.contains(symbol_name))
2843    {
2844        return 2;
2845    }
2846    if summary.contains(qualified_name) || hit_symbol.contains(qualified_name) {
2847        return 3;
2848    }
2849    if summary.contains(symbol_name) || hit_symbol.contains(symbol_name) {
2850        return 4;
2851    }
2852    if !source_module.is_empty() && path.contains(source_module) {
2853        return 5;
2854    }
2855    9
2856}
2857
2858fn module_stem(path: &str) -> String {
2859    Path::new(path)
2860        .file_stem()
2861        .and_then(|value| value.to_str())
2862        .unwrap_or_default()
2863        .to_ascii_lowercase()
2864}
2865
2866fn dedupe_search_hits(hits: &mut Vec<SearchHit>) {
2867    let mut seen = BTreeSet::new();
2868    hits.retain(|hit| seen.insert(hit.chunk_id));
2869}
2870
2871fn bounded_summary(text: &str) -> String {
2872    text.split_whitespace().collect::<Vec<_>>().join(" ").chars().take(240).collect()
2873}
2874
2875#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
2876struct LogicalSymbolKey {
2877    language: String,
2878    path: String,
2879    name: String,
2880    qualified_name: String,
2881    kind: String,
2882}
2883
2884impl LogicalSymbolKey {
2885    fn from(row: &LogicalSymbolMemberRow) -> Self {
2886        Self {
2887            language: row.language.clone(),
2888            path: row.path.clone(),
2889            name: row.name.clone(),
2890            qualified_name: row.qualified_name.clone(),
2891            kind: row.kind.clone(),
2892        }
2893    }
2894}
2895
2896#[derive(Debug, Clone)]
2897struct LogicalSymbolMemberRow {
2898    symbol_id: i64,
2899    path: String,
2900    language: String,
2901    name: String,
2902    qualified_name: String,
2903    kind: String,
2904    signature: Option<String>,
2905    start_line: i64,
2906    end_line: i64,
2907}
2908
2909fn symbol_line_for_byte(
2910    text: &str,
2911    chunk_start_byte: usize,
2912    chunk_start_line: i64,
2913    byte: usize,
2914) -> i64 {
2915    if byte <= chunk_start_byte {
2916        return chunk_start_line.max(1);
2917    }
2918    let local = byte.saturating_sub(chunk_start_byte).min(text.len());
2919    chunk_start_line
2920        + i64::try_from(text[..local].bytes().filter(|byte| *byte == b'\n').count()).unwrap_or(0)
2921}
2922
2923fn graph_only_reason(edge: &crate::query::graph::GraphHop, current_line: Option<&str>) -> String {
2924    let Some(line) = current_line else {
2925        return "missing_current_source_line".to_string();
2926    };
2927    if edge
2928        .target_qualified_name
2929        .as_deref()
2930        .is_some_and(|qualified| !qualified.is_empty() && line.contains(qualified))
2931    {
2932        return "qualified_call_pattern_mismatch".to_string();
2933    }
2934    if edge.target.as_deref().is_some_and(|target| !target.is_empty() && line.contains(target)) {
2935        return "imported_or_unqualified_call".to_string();
2936    }
2937    if edge
2938        .evidence
2939        .as_deref()
2940        .is_some_and(|evidence| !evidence.is_empty() && line.contains(evidence.trim()))
2941    {
2942        return "regex_too_narrow".to_string();
2943    }
2944    "stale_or_overbroad_graph_edge".to_string()
2945}
2946
2947fn is_likely_false_positive_graph_only(
2948    edge: &crate::query::graph::GraphHop,
2949    graph_only: &crate::query::graph::GraphOnlyEdge,
2950) -> bool {
2951    if graph_only.likely_reason == "stale_or_overbroad_graph_edge" {
2952        return true;
2953    }
2954    edge.resolution == "target_name_fallback"
2955        || edge.confidence == "NameOnly"
2956        || edge.confidence == "Ambiguous"
2957        || !edge.verified_target_symbol
2958}
2959
2960fn classify_text_only_hit(
2961    path: &str,
2962    text: &str,
2963    parser_failure_paths: &BTreeSet<String>,
2964) -> &'static str {
2965    if parser_failure_paths.contains(path) {
2966        return "parser_failure";
2967    }
2968    if is_generated_path(path) {
2969        return "generated_text_mention";
2970    }
2971    let trimmed = text.trim_start();
2972    if is_comment_like_text(trimmed) {
2973        return "comment_text_mention";
2974    }
2975    if is_import_or_declaration_text(trimmed) {
2976        return "declaration_text_mention";
2977    }
2978    if is_test_like_path(path) && is_test_scaffolding_text(trimmed) {
2979        return "test_scaffolding_text_mention";
2980    }
2981    "parser_call_extraction"
2982}
2983
2984fn is_likely_parser_gap_kind(kind: &str) -> bool {
2985    matches!(kind, "parser_call_extraction" | "parser_failure")
2986}
2987
2988fn is_generated_path(path: &str) -> bool {
2989    path.contains("/generated/")
2990        || path.contains("/generated-web/")
2991        || path.ends_with(".d.ts")
2992        || path.ends_with("_bg.wasm.d.ts")
2993}
2994
2995fn is_comment_like_text(text: &str) -> bool {
2996    text.starts_with("//")
2997        || text.starts_with("/*")
2998        || text.starts_with('*')
2999        || text.starts_with("*/")
3000        || text.starts_with("#")
3001}
3002
3003fn is_import_or_declaration_text(text: &str) -> bool {
3004    text.starts_with("import ")
3005        || text.starts_with("export type ")
3006        || text.starts_with("export interface ")
3007        || text.starts_with("type ")
3008        || text.starts_with("interface ")
3009        || text.starts_with("declare ")
3010}
3011
3012fn is_test_scaffolding_text(text: &str) -> bool {
3013    text.contains(".mock")
3014        || text.contains("jest.")
3015        || text.contains("jest<")
3016        || text.contains("expect(")
3017        || text.contains("toHaveBeen")
3018        || text.contains("describe(")
3019        || text.contains("it(")
3020        || text.contains("test(")
3021}
3022
3023fn recommended_graph_text_fallback(
3024    parser_gaps: &[crate::query::graph::TextOnlyHit],
3025    graph_only_edges: &[crate::query::graph::GraphOnlyEdge],
3026) -> String {
3027    match (parser_gaps.is_empty(), graph_only_edges.is_empty()) {
3028        (false, false) => "both",
3029        (false, true) => "text",
3030        (true, false) => "graph",
3031        (true, true) => "none",
3032    }
3033    .to_string()
3034}
3035
3036fn compare_pattern_match_mode(pattern: &str, symbol_name: &str) -> String {
3037    if symbol_name.is_empty() {
3038        return "regex".to_string();
3039    }
3040    let escaped_call = format!("{symbol_name}\\(");
3041    let plain_call = format!("{symbol_name}(");
3042    if pattern.contains("\\b")
3043        || pattern.contains("\\W")
3044        || pattern.contains("[^")
3045        || pattern.contains(&escaped_call)
3046        || pattern.contains(&plain_call)
3047    {
3048        return "identifier_or_call".to_string();
3049    }
3050    if pattern.contains(symbol_name) {
3051        return "substring_identifier".to_string();
3052    }
3053    "regex".to_string()
3054}
3055
3056fn is_test_like_path(path: &str) -> bool {
3057    let lower = path.to_ascii_lowercase();
3058    lower.contains("/test/")
3059        || lower.contains("/tests/")
3060        || lower.contains("/__tests__/")
3061        || lower.ends_with("_test.rs")
3062        || lower.ends_with(".test.ts")
3063        || lower.ends_with(".test.tsx")
3064        || lower.ends_with(".spec.ts")
3065        || lower.ends_with(".spec.tsx")
3066}
3067
3068#[derive(Debug)]
3069struct IndexedFile {
3070    path: String,
3071    sha256: String,
3072}
3073
3074#[derive(Debug, Clone)]
3075struct IndexFile {
3076    full_path: PathBuf,
3077    relative_path: PathBuf,
3078    language: Language,
3079    kind: TargetKind,
3080    commit_sha: String,
3081    worktree_id: String,
3082}
3083
3084#[derive(Debug, Clone)]
3085struct FileScope {
3086    commit_sha: String,
3087    worktree_id: String,
3088}
3089
3090impl FileScope {
3091    fn commit(commit_sha: String) -> Self {
3092        Self { commit_sha, worktree_id: String::new() }
3093    }
3094
3095    fn worktree(worktree_id: String) -> Self {
3096        Self { commit_sha: String::new(), worktree_id }
3097    }
3098}
3099
3100#[derive(Debug)]
3101struct PreparedIndexFile {
3102    file: IndexFile,
3103    prepared: anyhow::Result<PreparedIndexContent>,
3104}
3105
3106#[derive(Debug)]
3107struct PreparedIndexContent {
3108    modified_at_ms: i64,
3109    text: String,
3110    sha256: String,
3111    chunks: Vec<Chunk>,
3112    symbols: Vec<Symbol>,
3113    parser_failure: Option<String>,
3114}
3115
3116#[derive(Debug)]
3117struct DiscoveryPlan {
3118    files: Vec<IndexFile>,
3119    deleted: BTreeSet<PathBuf>,
3120    unindexed: Vec<IndexFile>,
3121    changed: Vec<PathBuf>,
3122    discovered_files: usize,
3123    indexed_files: usize,
3124}
3125
3126#[derive(Debug, Default)]
3127struct GitChangedPaths {
3128    changed: BTreeSet<PathBuf>,
3129    deleted: BTreeSet<PathBuf>,
3130}
3131
3132fn collect_index_files(config: &Config) -> anyhow::Result<Vec<IndexFile>> {
3133    let mut targets = config.targets.iter().collect::<Vec<_>>();
3134    targets.sort_by_key(|target| match target.kind {
3135        TargetKind::Generated => 0,
3136        TargetKind::Tests => 1,
3137        TargetKind::Docs => 2,
3138        TargetKind::Source => 3,
3139    });
3140    let mut seen = BTreeSet::new();
3141    let mut files = Vec::new();
3142
3143    for target in targets {
3144        for file in walker::walk_target(&config.root, target)? {
3145            let relative_path = file.strip_prefix(&config.root)?.to_path_buf();
3146            if !seen.insert(relative_path.clone()) {
3147                continue;
3148            }
3149            files.push(IndexFile {
3150                full_path: file,
3151                relative_path,
3152                language: target.language,
3153                kind: target.kind,
3154                commit_sha: String::new(),
3155                worktree_id: String::new(),
3156            });
3157        }
3158    }
3159
3160    Ok(files)
3161}
3162
3163fn collect_changed_index_files(
3164    config: &Config,
3165    changes: &GitChangedPaths,
3166) -> anyhow::Result<Vec<IndexFile>> {
3167    let mut files = Vec::new();
3168    for relative_path in &changes.changed {
3169        let full_path = config.root.join(relative_path);
3170        if !full_path.is_file() {
3171            continue;
3172        }
3173        let Some((language, kind)) = target_for_path(config, relative_path) else {
3174            continue;
3175        };
3176        files.push(IndexFile {
3177            full_path,
3178            relative_path: relative_path.clone(),
3179            language,
3180            kind,
3181            commit_sha: String::new(),
3182            worktree_id: String::new(),
3183        });
3184    }
3185    Ok(files)
3186}
3187
3188fn spawn_git_history_prepare(
3189    root: &Path,
3190) -> JoinHandle<anyhow::Result<git_history::PreparedGitHistory>> {
3191    let root = root.to_path_buf();
3192    thread::spawn(move || git_history::prepare(&root))
3193}
3194
3195fn join_git_history_prepare(
3196    handle: JoinHandle<anyhow::Result<git_history::PreparedGitHistory>>,
3197) -> anyhow::Result<git_history::PreparedGitHistory> {
3198    handle.join().map_err(|_| anyhow::anyhow!("git history preparation panicked"))?
3199}
3200
3201fn prepare_index_file(file: &IndexFile) -> PreparedIndexFile {
3202    PreparedIndexFile { file: file.clone(), prepared: prepare_index_content(file) }
3203}
3204
3205fn prepare_files_with_progress<F>(
3206    files: &[IndexFile],
3207    progress: &mut F,
3208) -> anyhow::Result<Vec<PreparedIndexFile>>
3209where
3210    F: FnMut(IndexProgress),
3211{
3212    #[derive(Debug)]
3213    struct PreparedProgress {
3214        current: usize,
3215        total: usize,
3216        path: PathBuf,
3217        language: Language,
3218        kind: TargetKind,
3219    }
3220
3221    let total = files.len();
3222    let prepared = thread::scope(|scope| {
3223        let (tx, rx) = mpsc::channel();
3224        let completed = AtomicUsize::new(0);
3225        let handle = scope.spawn(move || {
3226            files
3227                .par_iter()
3228                .map(|file| {
3229                    let prepared = prepare_index_file(file);
3230                    let current = completed.fetch_add(1, Ordering::Relaxed) + 1;
3231                    if should_report_file_progress(current, total) {
3232                        let _ = tx.send(PreparedProgress {
3233                            current,
3234                            total,
3235                            path: file.relative_path.clone(),
3236                            language: file.language,
3237                            kind: file.kind,
3238                        });
3239                    }
3240                    prepared
3241                })
3242                .collect::<Vec<_>>()
3243        });
3244
3245        for event in rx {
3246            progress(IndexProgress::PreparingFile {
3247                current: event.current,
3248                total: event.total,
3249                path: event.path,
3250                language: event.language,
3251                kind: event.kind,
3252            });
3253        }
3254
3255        handle.join().map_err(|_| anyhow::anyhow!("parallel file preparation panicked"))
3256    })?;
3257    Ok(prepared)
3258}
3259
3260fn should_report_file_progress(current: usize, total: usize) -> bool {
3261    if total == 0 {
3262        return false;
3263    }
3264    current == 1
3265        || current == total
3266        || current.saturating_mul(10) / total
3267            != current.saturating_sub(1).saturating_mul(10) / total
3268}
3269
3270fn prepare_index_content(file: &IndexFile) -> anyhow::Result<PreparedIndexContent> {
3271    let text = fs::read_to_string(&file.full_path)?;
3272    let modified_at_ms = file_metadata_ms(&file.full_path)?;
3273    let sha256 = hex_sha256(text.as_bytes());
3274    let parser_failure =
3275        if file.language != Language::Markdown && file.kind != TargetKind::Generated {
3276            if text.len() > chunker::MAX_STRUCTURAL_PARSE_BYTES {
3277                None
3278            } else {
3279                parser::parse_error(&file.relative_path, file.language, &text)
3280                    .unwrap_or_else(|err| Some(err.to_string()))
3281            }
3282        } else {
3283            None
3284        };
3285    let chunks = if file.kind == TargetKind::Generated {
3286        chunker::generated_chunks_for_file(&file.relative_path, &text)
3287    } else {
3288        chunker::chunks_for_file(&file.relative_path, file.language, &text)
3289    };
3290    let symbols =
3291        if file.kind == TargetKind::Generated || text.len() > chunker::MAX_STRUCTURAL_PARSE_BYTES {
3292            Vec::new()
3293        } else {
3294            symbols::symbols_for_file(&file.relative_path, file.language, &text)
3295        };
3296    Ok(PreparedIndexContent { modified_at_ms, text, sha256, chunks, symbols, parser_failure })
3297}
3298
3299fn discovery_plan(conn: &rusqlite::Connection, config: &Config) -> anyhow::Result<DiscoveryPlan> {
3300    let discovered = collect_index_files(config)?;
3301    let mut indexed = indexed_file_map(conn)?;
3302    let mut current_paths = BTreeSet::new();
3303    let mut files = Vec::new();
3304    let mut unindexed = Vec::new();
3305    let mut changed = Vec::new();
3306    let discovered_files = discovered.len();
3307    let hashed = discovered
3308        .par_iter()
3309        .map(|file| -> anyhow::Result<(IndexFile, String)> {
3310            let text = fs::read(&file.full_path)?;
3311            Ok((file.clone(), hex_sha256(&text)))
3312        })
3313        .collect::<Vec<_>>();
3314
3315    for hashed_file in hashed {
3316        let (file, current_hash) = hashed_file?;
3317        let relative = path_string(&file.relative_path);
3318        current_paths.insert(file.relative_path.clone());
3319        let Some(indexed_hash) = indexed.remove(&relative) else {
3320            unindexed.push(file.clone());
3321            files.push(file);
3322            continue;
3323        };
3324        if current_hash != indexed_hash {
3325            changed.push(file.relative_path.clone());
3326            files.push(file);
3327        }
3328    }
3329
3330    let deleted = indexed
3331        .into_keys()
3332        .map(PathBuf::from)
3333        .filter(|path| !current_paths.contains(path))
3334        .collect::<BTreeSet<_>>();
3335
3336    Ok(DiscoveryPlan {
3337        discovered_files,
3338        indexed_files: current_paths
3339            .len()
3340            .saturating_add(deleted.len())
3341            .saturating_sub(unindexed.len()),
3342        files,
3343        deleted,
3344        unindexed,
3345        changed,
3346    })
3347}
3348
3349fn indexed_file_map(conn: &rusqlite::Connection) -> anyhow::Result<BTreeMap<String, String>> {
3350    let mut stmt = conn.prepare("SELECT path, sha256 FROM files ORDER BY path")?;
3351    let rows =
3352        stmt.query_map([], |row| Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)))?;
3353    let mut files = BTreeMap::new();
3354    for row in rows {
3355        let (path, sha256) = row?;
3356        files.insert(path, sha256);
3357    }
3358    Ok(files)
3359}
3360
3361fn target_for_path(config: &Config, relative_path: &Path) -> Option<(Language, TargetKind)> {
3362    let relative = path_string(relative_path);
3363    let language = Language::from_path(relative_path)?;
3364    let mut targets = config.targets.iter().collect::<Vec<_>>();
3365    targets.sort_by_key(|target| match target.kind {
3366        TargetKind::Generated => 0,
3367        TargetKind::Tests => 1,
3368        TargetKind::Docs => 2,
3369        TargetKind::Source => 3,
3370    });
3371    targets.into_iter().find_map(|target| {
3372        if target.language != language {
3373            return None;
3374        }
3375        if !target.directories.iter().any(|directory| {
3376            directory.as_os_str().is_empty()
3377                || directory == Path::new(".")
3378                || relative_path.starts_with(directory)
3379        }) {
3380            return None;
3381        }
3382        if target.exclude.iter().any(|pattern| matches_simple_pattern(&relative, pattern)) {
3383            return None;
3384        }
3385        if !target.include.iter().any(|pattern| matches_simple_pattern(&relative, pattern)) {
3386            return None;
3387        }
3388        Some((target.language, target.kind))
3389    })
3390}
3391
3392fn git_changed_paths(root: &Path) -> anyhow::Result<GitChangedPaths> {
3393    let repo = gix::discover(root)?;
3394    let worktree_root = repo
3395        .workdir()
3396        .ok_or_else(|| anyhow::anyhow!("git repository has no worktree"))?
3397        .to_path_buf();
3398    let pathspec = config_root_pathspec(&worktree_root, root);
3399    let mut paths = GitChangedPaths::default();
3400
3401    for item in repo
3402        .status(gix::progress::Discard)?
3403        .untracked_files(UntrackedFiles::Files)
3404        .tree_index_track_renames(tree_index::TrackRenames::Disabled)
3405        .into_iter([pathspec])?
3406    {
3407        let item = item?;
3408        let Some(path) = repo_relative_path_to_config_path(&worktree_root, root, item.location())
3409        else {
3410            continue;
3411        };
3412        if root.join(&path).exists() {
3413            if !paths.deleted.contains(&path) {
3414                paths.changed.insert(path);
3415            }
3416        } else {
3417            paths.changed.remove(&path);
3418            paths.deleted.insert(path);
3419        }
3420    }
3421
3422    Ok(paths)
3423}
3424
3425fn repo_relative_path_to_config_path(
3426    worktree_root: &Path,
3427    config_root: &Path,
3428    repo_relative_path: &gix::bstr::BStr,
3429) -> Option<PathBuf> {
3430    let path = PathBuf::from(repo_relative_path.to_str_lossy().as_ref());
3431    worktree_root.join(path).strip_prefix(config_root).ok().map(Path::to_path_buf)
3432}
3433
3434fn config_root_pathspec(worktree_root: &Path, config_root: &Path) -> BString {
3435    let relative = config_root.strip_prefix(worktree_root).unwrap_or_else(|_| Path::new(""));
3436    let relative = path_string(relative);
3437    if relative.is_empty() || relative == "." {
3438        BString::from("*")
3439    } else {
3440        BString::from(format!("{relative}/**"))
3441    }
3442}
3443
3444fn matches_simple_pattern(path: &str, pattern: &str) -> bool {
3445    if let Some(extension) = pattern.strip_prefix("**/*.") {
3446        return path.ends_with(&format!(".{extension}"));
3447    }
3448    if let Some(prefix) = pattern.strip_suffix("/**") {
3449        return path.starts_with(prefix);
3450    }
3451    path == pattern || path.contains(pattern.trim_matches('*'))
3452}
3453
3454fn meta_for(conn: &rusqlite::Connection, key: &str) -> anyhow::Result<Option<String>> {
3455    Ok(conn
3456        .query_row("SELECT value FROM index_meta WHERE key = ?1", [key], |row| row.get(0))
3457        .optional()?)
3458}
3459
3460fn git_output(root: &Path, args: &[&str]) -> Option<String> {
3461    let output = Command::new("git").args(args).current_dir(root).output().ok()?;
3462    if !output.status.success() {
3463        return None;
3464    }
3465    Some(String::from_utf8_lossy(&output.stdout).trim().to_string())
3466}
3467
3468fn resolve_git_context(root: &Path) -> (String, String) {
3469    let commit_sha =
3470        git_output(root, &["rev-parse", "HEAD"]).map(|s| s.trim().to_string()).unwrap_or_default();
3471    let worktree_id = root.to_string_lossy().trim_end_matches('/').to_string();
3472    (commit_sha, worktree_id)
3473}
3474
3475fn file_metadata_ms(path: &Path) -> anyhow::Result<i64> {
3476    let modified = fs::metadata(path)?.modified()?;
3477    Ok(duration_ms(modified.duration_since(UNIX_EPOCH)?))
3478}
3479
3480fn now_ms() -> i64 {
3481    duration_ms(SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default())
3482}
3483
3484fn duration_ms(duration: std::time::Duration) -> i64 {
3485    i64::try_from(duration.as_millis()).unwrap_or(i64::MAX)
3486}
3487
3488fn hex_sha256(bytes: &[u8]) -> String {
3489    let hash = Sha256::digest(bytes);
3490    let mut out = String::with_capacity(hash.len() * 2);
3491    for byte in hash {
3492        use std::fmt::Write as _;
3493        let _ = write!(out, "{byte:02x}");
3494    }
3495    out
3496}
3497
3498fn path_string(path: &Path) -> String {
3499    path.to_string_lossy().replace('\\', "/")
3500}
3501
3502#[cfg(test)]
3503mod schema_bootstrap_tests {
3504    use std::sync::atomic::{AtomicU64, Ordering};
3505
3506    use super::*;
3507    use crate::config::ResolvedTarget;
3508
3509    static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
3510
3511    #[test]
3512    fn rebuild_bootstraps_sqlite_schema_for_empty_target_root() {
3513        let root = unique_temp_root();
3514        let _ = fs::remove_dir_all(&root);
3515        let docs = root.join("docs");
3516        fs::create_dir_all(&docs).unwrap();
3517
3518        let config = Config {
3519            root: root.clone(),
3520            database: root.join(".rag-rat/index.sqlite"),
3521            targets: vec![ResolvedTarget {
3522                name: "markdown".to_string(),
3523                language: Language::Markdown,
3524                directories: vec![PathBuf::from("docs")],
3525                include: vec!["**/*.md".to_string()],
3526                exclude: Vec::new(),
3527                kind: TargetKind::Docs,
3528            }],
3529            local_ai: Default::default(),
3530        };
3531
3532        let db = IndexDatabase::rebuild(&config).unwrap();
3533        assert!(config.database.exists());
3534        assert_eq!(table_count(&db, "files"), 1);
3535        assert_eq!(table_count(&db, "chunks"), 1);
3536        assert_eq!(table_count(&db, "symbols"), 1);
3537        assert_eq!(table_count(&db, "parser_failures"), 1);
3538        assert_eq!(table_count(&db, "index_meta"), 1);
3539        assert_eq!(table_count(&db, "chunk_fts"), 1);
3540        assert_eq!(table_count(&db, "git_commits"), 1);
3541        assert_eq!(table_count(&db, "git_file_changes"), 1);
3542        assert_eq!(table_count(&db, "git_chunk_blame"), 1);
3543        assert_eq!(table_count(&db, "commit_fts"), 1);
3544        assert_eq!(table_count(&db, "ai_models"), 1);
3545        assert_eq!(table_count(&db, "chunk_embeddings"), 1);
3546        assert_eq!(table_count(&db, "chunk_summaries"), 1);
3547        assert_eq!(table_count(&db, "reconcile_meta"), 1);
3548        assert_eq!(table_count(&db, "reconcile_attempts"), 1);
3549        assert!(file_columns(&db).contains(&"indexed_revision".to_string()));
3550        assert_eq!(indexed_revision_count(&db), 0);
3551        assert!(chunk_columns(&db).contains(&"anchor_version".to_string()));
3552        assert!(chunk_columns(&db).contains(&"normalized_hash".to_string()));
3553        assert!(chunk_columns(&db).contains(&"start_boundary_hash".to_string()));
3554        assert!(chunk_columns(&db).contains(&"end_boundary_hash".to_string()));
3555        assert!(chunk_columns(&db).contains(&"source_revision".to_string()));
3556        let embedding_columns = table_columns(&db, "chunk_embeddings");
3557        assert!(embedding_columns.contains(&"model_version".to_string()));
3558        assert!(embedding_columns.contains(&"input_hash".to_string()));
3559        assert!(embedding_columns.contains(&"embedding_text_version".to_string()));
3560        assert!(embedding_columns.contains(&"embedding_policy".to_string()));
3561        assert!(embedding_columns.contains(&"embedding_priority".to_string()));
3562        assert!(embedding_columns.contains(&"input_chars".to_string()));
3563        assert!(embedding_columns.contains(&"input_truncated".to_string()));
3564        assert!(embedding_columns.contains(&"attempt_count".to_string()));
3565        assert!(embedding_columns.contains(&"next_retry_after_ms".to_string()));
3566        assert!(embedding_columns.contains(&"computed_at_ms".to_string()));
3567        let edge_columns = table_columns(&db, "edges");
3568        assert!(edge_columns.contains(&"source_start_line".to_string()));
3569        assert!(edge_columns.contains(&"source_end_line".to_string()));
3570        assert!(edge_columns.contains(&"source_start_byte".to_string()));
3571        assert!(edge_columns.contains(&"source_end_byte".to_string()));
3572        assert!(edge_columns.contains(&"target_start_line".to_string()));
3573        assert!(edge_columns.contains(&"target_end_line".to_string()));
3574        assert!(edge_columns.contains(&"target_qualified_name".to_string()));
3575        assert!(edge_columns.contains(&"evidence".to_string()));
3576        assert!(edge_columns.contains(&"receiver_hint".to_string()));
3577        assert!(edge_columns.contains(&"resolution".to_string()));
3578        let logical_columns = table_columns(&db, "logical_symbols");
3579        assert!(logical_columns.contains(&"qualified_name".to_string()));
3580        assert!(logical_columns.contains(&"variant_count".to_string()));
3581        let member_columns = table_columns(&db, "logical_symbol_members");
3582        assert!(member_columns.contains(&"symbol_id".to_string()));
3583        assert!(member_columns.contains(&"signature_hash".to_string()));
3584        let github_ref_sync_columns = table_columns(&db, "github_ref_sync");
3585        assert!(github_ref_sync_columns.contains(&"status".to_string()));
3586        assert!(github_ref_sync_columns.contains(&"last_error".to_string()));
3587        let symbol_fact_columns = table_columns(&db, "symbol_facts");
3588        assert!(symbol_fact_columns.contains(&"fact_kind".to_string()));
3589        assert!(symbol_fact_columns.contains(&"fact_value".to_string()));
3590        assert_eq!(
3591            db.status(&config.database).unwrap().schema.current_version,
3592            schema::LATEST_SCHEMA_VERSION
3593        );
3594
3595        fs::remove_dir_all(root).unwrap();
3596    }
3597
3598    #[test]
3599    fn rebuild_reports_file_preparation_progress() {
3600        let root = unique_temp_root();
3601        let _ = fs::remove_dir_all(&root);
3602        fs::create_dir_all(root.join("src")).unwrap();
3603        fs::write(root.join("src/lib.rs"), "pub fn exported() {}\n").unwrap();
3604
3605        let config = source_config(root.clone(), Language::Rust);
3606        let mut events = Vec::new();
3607        IndexDatabase::rebuild_with_progress(&config, |progress| events.push(progress)).unwrap();
3608
3609        assert!(
3610            events.iter().any(|event| matches!(event, IndexProgress::PreparingFile { .. })),
3611            "missing preparing progress event: {events:?}"
3612        );
3613        assert!(
3614            events.iter().any(|event| matches!(event, IndexProgress::IndexingFile { .. })),
3615            "missing indexing progress event: {events:?}"
3616        );
3617
3618        fs::remove_dir_all(root).unwrap();
3619    }
3620
3621    #[test]
3622    fn file_progress_reports_first_final_and_decile_boundaries() {
3623        let reported = (1..=100)
3624            .filter(|current| should_report_file_progress(*current, 100))
3625            .collect::<Vec<_>>();
3626        assert_eq!(reported, vec![1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]);
3627    }
3628
3629    #[test]
3630    fn compatible_open_requires_recorded_schema_version() {
3631        let root = unique_temp_root();
3632        let _ = fs::remove_dir_all(&root);
3633        fs::create_dir_all(root.join(".rag-rat")).unwrap();
3634        let database = root.join(".rag-rat/index.sqlite");
3635        IndexDatabase::migrate(&database).unwrap();
3636        let conn = rusqlite::Connection::open(&database).unwrap();
3637        conn.execute_batch("DROP TABLE schema_version;").unwrap();
3638        drop(conn);
3639
3640        let status = IndexDatabase::migration_check(&database).unwrap();
3641        assert_eq!(status.state, schema::SchemaState::Older);
3642        let err = IndexDatabase::open(&database).unwrap_err().to_string();
3643        assert!(err.contains("run `rag-rat migrate`"), "{err}");
3644
3645        let migrated = IndexDatabase::migrate(&database).unwrap();
3646        assert_eq!(migrated.state, schema::SchemaState::Compatible);
3647        IndexDatabase::open(&database).unwrap();
3648
3649        fs::remove_dir_all(root).unwrap();
3650    }
3651
3652    #[test]
3653    fn migrate_adds_edge_name_columns_before_indexing_them() {
3654        let root = unique_temp_root();
3655        let _ = fs::remove_dir_all(&root);
3656        fs::create_dir_all(root.join(".rag-rat")).unwrap();
3657        let database = root.join(".rag-rat/index.sqlite");
3658        let conn = rusqlite::Connection::open(&database).unwrap();
3659        conn.execute_batch(
3660            "
3661            CREATE TABLE files(
3662                id INTEGER PRIMARY KEY AUTOINCREMENT,
3663                path TEXT NOT NULL UNIQUE,
3664                language TEXT NOT NULL,
3665                kind TEXT NOT NULL,
3666                sha256 TEXT NOT NULL,
3667                modified_at_ms INTEGER NOT NULL,
3668                generated INTEGER NOT NULL DEFAULT 0,
3669                indexed_at_ms INTEGER NOT NULL
3670            );
3671            CREATE TABLE chunks(
3672                id INTEGER PRIMARY KEY AUTOINCREMENT,
3673                file_id INTEGER NOT NULL,
3674                chunk_kind TEXT NOT NULL,
3675                symbol_path TEXT,
3676                start_byte INTEGER NOT NULL,
3677                end_byte INTEGER NOT NULL,
3678                start_line INTEGER NOT NULL,
3679                end_line INTEGER NOT NULL,
3680                text TEXT NOT NULL,
3681                text_hash TEXT NOT NULL
3682            );
3683            CREATE TABLE symbols(
3684                id INTEGER PRIMARY KEY AUTOINCREMENT,
3685                file_id INTEGER NOT NULL,
3686                language TEXT NOT NULL,
3687                name TEXT NOT NULL,
3688                qualified_name TEXT NOT NULL,
3689                kind TEXT NOT NULL,
3690                start_byte INTEGER NOT NULL,
3691                end_byte INTEGER NOT NULL,
3692                signature TEXT,
3693                docs TEXT
3694            );
3695            CREATE TABLE edges(
3696                id INTEGER PRIMARY KEY AUTOINCREMENT,
3697                from_symbol_id INTEGER,
3698                to_symbol_id INTEGER,
3699                edge_kind TEXT NOT NULL,
3700                confidence TEXT NOT NULL
3701            );
3702            ",
3703        )
3704        .unwrap();
3705        drop(conn);
3706
3707        let migrated = IndexDatabase::migrate(&database).unwrap();
3708        assert_eq!(migrated.state, schema::SchemaState::Compatible);
3709        let db = IndexDatabase::open(&database).unwrap();
3710        let columns = table_columns(&db, "edges");
3711        assert!(columns.contains(&"from_name".to_string()));
3712        assert!(columns.contains(&"to_name".to_string()));
3713        assert!(columns.contains(&"source_start_line".to_string()));
3714        assert!(columns.contains(&"source_end_line".to_string()));
3715        assert!(columns.contains(&"source_start_byte".to_string()));
3716        assert!(columns.contains(&"source_end_byte".to_string()));
3717        assert!(columns.contains(&"target_start_line".to_string()));
3718        assert!(columns.contains(&"target_end_line".to_string()));
3719        assert_eq!(table_count(&db, "idx_edges_from_name"), 1);
3720        assert_eq!(table_count(&db, "idx_edges_to_name"), 1);
3721
3722        fs::remove_dir_all(root).unwrap();
3723    }
3724
3725    #[test]
3726    fn migrate_preserves_github_papertrail_cache() {
3727        let (root, config) =
3728            markdown_config("# Decision\nRefs cq27-dev/rag-rat#42\nwe will keep sqlite\n");
3729        let db = IndexDatabase::rebuild(&config).unwrap();
3730        github::sync_from_refs(db.storage.connection(), &root, Some(&MockGitHubClient), false)
3731            .unwrap();
3732        assert_eq!(row_count(&db, "github_refs"), 1);
3733        assert_eq!(row_count(&db, "github_issues"), 1);
3734        assert_eq!(row_count(&db, "github_comments"), 1);
3735        assert_eq!(row_count(&db, "github_pull_requests"), 1);
3736        assert_eq!(row_count(&db, "github_reviews"), 1);
3737        assert_eq!(row_count(&db, "github_review_comments"), 1);
3738        assert_eq!(row_count(&db, "github_fts"), 5);
3739        db.storage
3740            .connection()
3741            .execute("DELETE FROM schema_version WHERE id = ?1", ["010_symbol_facts"])
3742            .unwrap();
3743        drop(db);
3744
3745        let migrated = IndexDatabase::migrate(&config.database).unwrap();
3746        assert_eq!(migrated.state, schema::SchemaState::Compatible);
3747        let db = IndexDatabase::open(&config.database).unwrap();
3748        assert_eq!(row_count(&db, "github_refs"), 1);
3749        assert_eq!(row_count(&db, "github_issues"), 1);
3750        assert_eq!(row_count(&db, "github_comments"), 1);
3751        assert_eq!(row_count(&db, "github_pull_requests"), 1);
3752        assert_eq!(row_count(&db, "github_reviews"), 1);
3753        assert_eq!(row_count(&db, "github_review_comments"), 1);
3754        assert_eq!(row_count(&db, "github_fts"), 5);
3755        let hits = db.github_issue_search("sqlite", 10).unwrap();
3756        assert_eq!(hits.len(), 1);
3757        assert_eq!(hits[0].number, 42);
3758
3759        fs::remove_dir_all(root).unwrap();
3760    }
3761
3762    #[test]
3763    fn full_rebuild_preserves_github_papertrail_cache() {
3764        let (root, config) =
3765            markdown_config("# Decision\nRefs cq27-dev/rag-rat#42\nwe will keep sqlite\n");
3766        let db = IndexDatabase::rebuild(&config).unwrap();
3767        github::sync_from_refs(db.storage.connection(), &root, Some(&MockGitHubClient), false)
3768            .unwrap();
3769        assert_eq!(row_count(&db, "github_issues"), 1);
3770        assert_eq!(row_count(&db, "github_fts"), 5);
3771        drop(db);
3772
3773        let db = IndexDatabase::rebuild(&config).unwrap();
3774
3775        assert_eq!(row_count(&db, "github_refs"), 1);
3776        assert_eq!(row_count(&db, "github_issues"), 1);
3777        assert_eq!(row_count(&db, "github_comments"), 1);
3778        assert_eq!(row_count(&db, "github_pull_requests"), 1);
3779        assert_eq!(row_count(&db, "github_reviews"), 1);
3780        assert_eq!(row_count(&db, "github_review_comments"), 1);
3781        assert_eq!(row_count(&db, "github_ref_sync"), 1);
3782        assert_eq!(row_count(&db, "github_fts"), 5);
3783        let hits = db.github_issue_search("sqlite", 10).unwrap();
3784        assert_eq!(hits.len(), 1);
3785        assert_eq!(hits[0].number, 42);
3786
3787        fs::remove_dir_all(root).unwrap();
3788    }
3789
3790    #[test]
3791    fn full_rebuild_preserves_installed_model_manifest() {
3792        let (root, config) = markdown_config("alpha token with enough detail for embeddings\n");
3793        let db = IndexDatabase::rebuild(&config).unwrap();
3794        db.install_model(ai::HASH_MODEL_ID).unwrap();
3795        let before = db.local_ai_status().unwrap();
3796        assert_eq!(before.embedding.model_id, ai::HASH_MODEL_ID);
3797        assert!(before.embedding.installed);
3798        drop(db);
3799
3800        let db = IndexDatabase::rebuild(&config).unwrap();
3801
3802        let after = db.local_ai_status().unwrap();
3803        assert_eq!(after.embedding.model_id, ai::HASH_MODEL_ID);
3804        assert!(after.embedding.installed);
3805        assert_eq!(after.embedding.state, "Ready");
3806
3807        fs::remove_dir_all(root).unwrap();
3808    }
3809
3810    #[test]
3811    fn full_rebuild_preserves_other_worktree_contexts() {
3812        let root = unique_temp_root();
3813        let _ = fs::remove_dir_all(&root);
3814        fs::create_dir_all(root.join("src")).unwrap();
3815        fs::write(root.join("src/lib.rs"), "pub fn current_context() {}\n").unwrap();
3816        let config = source_config(root.clone(), Language::Rust);
3817        let db = IndexDatabase::rebuild(&config).unwrap();
3818        let other_file_id = db
3819            .storage
3820            .connection()
3821            .query_row(
3822                "
3823                INSERT INTO main.files(
3824                    path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms,
3825                    indexed_revision, commit_sha, worktree_id
3826                )
3827                VALUES ('src/other.rs', 'rust', 'source', 'other-sha', 0, 0, 1, 'other-sha', '', 'other-worktree')
3828                RETURNING id
3829                ",
3830                [],
3831                |row| row.get::<_, i64>(0),
3832            )
3833            .unwrap();
3834        let other_chunk_id = db
3835            .storage
3836            .connection()
3837            .query_row(
3838                "
3839                INSERT INTO main.chunks(
3840                    file_id, chunk_kind, symbol_path, start_byte, end_byte, start_line, end_line,
3841                    text, text_hash, source_revision, anchor_version, normalized_hash,
3842                    start_boundary_hash, end_boundary_hash, start_context_hash, end_context_hash,
3843                    context_radius, embedding_policy, embedding_priority
3844                )
3845                VALUES (?1, 'symbol', 'other_context', 0, 12, 1, 1, 'other context', 'other-text',
3846                    'other-sha', 1, '', '', '', '', '', 2, 'Embed', 1)
3847                RETURNING id
3848                ",
3849                [other_file_id],
3850                |row| row.get::<_, i64>(0),
3851            )
3852            .unwrap();
3853        db.storage
3854            .connection()
3855            .execute(
3856                "
3857                INSERT INTO main.symbols(
3858                    file_id, language, name, qualified_name, kind, start_byte, end_byte, signature, docs
3859                )
3860                VALUES (?1, 'rust', 'other_context', 'other_context', 'function', 0, 12, NULL, NULL)
3861                ",
3862                [other_file_id],
3863            )
3864            .unwrap();
3865        db.storage
3866            .connection()
3867            .execute(
3868                "INSERT INTO main.chunk_fts(rowid, text) VALUES (?1, 'other context')",
3869                [other_chunk_id],
3870            )
3871            .unwrap();
3872        drop(db);
3873
3874        let db = IndexDatabase::rebuild(&config).unwrap();
3875
3876        assert_eq!(
3877            db.storage
3878                .connection()
3879                .query_row(
3880                    "SELECT COUNT(*) FROM main.files WHERE worktree_id = 'other-worktree'",
3881                    [],
3882                    |row| row.get::<_, i64>(0)
3883                )
3884                .unwrap(),
3885            1
3886        );
3887        assert_eq!(
3888            db.storage
3889                .connection()
3890                .query_row(
3891                    "SELECT COUNT(*) FROM main.chunks WHERE file_id = ?1",
3892                    [other_file_id],
3893                    |row| { row.get::<_, i64>(0) }
3894                )
3895                .unwrap(),
3896            1
3897        );
3898        assert_eq!(
3899            db.storage
3900                .connection()
3901                .query_row(
3902                    "SELECT COUNT(*) FROM main.symbols WHERE file_id = ?1",
3903                    [other_file_id],
3904                    |row| { row.get::<_, i64>(0) }
3905                )
3906                .unwrap(),
3907            1
3908        );
3909        assert_eq!(
3910            db.storage
3911                .connection()
3912                .query_row(
3913                    "SELECT COUNT(*) FROM main.chunk_fts WHERE rowid = ?1",
3914                    [other_chunk_id],
3915                    |row| { row.get::<_, i64>(0) }
3916                )
3917                .unwrap(),
3918            1
3919        );
3920
3921        fs::remove_dir_all(root).unwrap();
3922    }
3923
3924    #[test]
3925    fn compatible_open_refuses_dirty_and_newer_schema() {
3926        let root = unique_temp_root();
3927        let _ = fs::remove_dir_all(&root);
3928        fs::create_dir_all(root.join(".rag-rat")).unwrap();
3929        let database = root.join(".rag-rat/index.sqlite");
3930        let conn = rusqlite::Connection::open(&database).unwrap();
3931        conn.execute_batch(
3932            "
3933            CREATE TABLE schema_version(
3934                id TEXT PRIMARY KEY,
3935                applied_at_ms INTEGER NOT NULL,
3936                checksum TEXT NOT NULL,
3937                description TEXT NOT NULL
3938            );
3939            INSERT INTO schema_version(id, applied_at_ms, checksum, description)
3940            VALUES ('__dirty__', 1, '', 'partial migration in progress');
3941            ",
3942        )
3943        .unwrap();
3944        drop(conn);
3945
3946        let dirty = IndexDatabase::migration_check(&database).unwrap();
3947        assert_eq!(dirty.state, schema::SchemaState::Dirty);
3948        let err = IndexDatabase::open(&database).unwrap_err().to_string();
3949        assert!(err.contains("dirty or partial"), "{err}");
3950
3951        let conn = rusqlite::Connection::open(&database).unwrap();
3952        conn.execute_batch(
3953            "
3954            DELETE FROM schema_version;
3955            INSERT INTO schema_version(id, applied_at_ms, checksum, description)
3956            VALUES ('999_future_schema', 1, 'sha256:future', 'future schema');
3957            ",
3958        )
3959        .unwrap();
3960        drop(conn);
3961        let newer = IndexDatabase::migration_check(&database).unwrap();
3962        assert_eq!(newer.state, schema::SchemaState::Newer);
3963        let err = IndexDatabase::open(&database).unwrap_err().to_string();
3964        assert!(err.contains("newer rag-rat"), "{err}");
3965
3966        fs::remove_dir_all(root).unwrap();
3967    }
3968
3969    #[test]
3970    fn discover_mode_indexes_new_files_and_removes_deleted_files() {
3971        let root = unique_temp_root();
3972        let _ = fs::remove_dir_all(&root);
3973        fs::create_dir_all(root.join("src")).unwrap();
3974        fs::write(root.join("src/lib.rs"), "pub fn old_symbol() {}\n").unwrap();
3975        let config = source_config(root.clone(), Language::Rust);
3976        let db = IndexDatabase::rebuild(&config).unwrap();
3977        assert_eq!(db.discovery_status(&config).unwrap().unindexed_source_files, 0);
3978
3979        fs::write(root.join("src/new.rs"), "pub fn new_symbol() {}\n").unwrap();
3980        fs::remove_file(root.join("src/lib.rs")).unwrap();
3981        let drift = db.discovery_status(&config).unwrap();
3982        assert_eq!(drift.unindexed_source_files, 1);
3983        assert_eq!(drift.removed_indexed_files, 1);
3984        assert!(drift.warning.as_deref().unwrap().contains("rag-rat index --discover"));
3985
3986        let db = IndexDatabase::index_discover(&config).unwrap();
3987        let fresh = db.discovery_status(&config).unwrap();
3988        assert_eq!(fresh.unindexed_source_files, 0);
3989        assert_eq!(fresh.removed_indexed_files, 0);
3990        assert!(fresh.warning.is_none());
3991        assert_eq!(db.symbols("new_symbol", Some(Language::Rust), 10).unwrap().len(), 1);
3992        assert!(db.symbols("old_symbol", Some(Language::Rust), 10).unwrap().is_empty());
3993
3994        let mut events = Vec::new();
3995        let db = IndexDatabase::index_discover_with_progress(&config, |progress| {
3996            events.push(progress);
3997        })
3998        .unwrap();
3999        assert!(matches!(events.last(), Some(IndexProgress::Finished { files: 0 })));
4000        assert!(
4001            !events.iter().any(|event| matches!(
4002                event,
4003                IndexProgress::PreparingFile { .. } | IndexProgress::IndexingFile { .. }
4004            )),
4005            "no-op discover should not prepare or index files: {events:?}"
4006        );
4007        assert_eq!(db.symbols("new_symbol", Some(Language::Rust), 10).unwrap().len(), 1);
4008
4009        fs::remove_dir_all(root).unwrap();
4010    }
4011
4012    #[cfg(unix)]
4013    #[test]
4014    fn indexing_skips_symlink_loops() {
4015        let root = unique_temp_root();
4016        let _ = fs::remove_dir_all(&root);
4017        fs::create_dir_all(root.join("src")).unwrap();
4018        fs::write(root.join("src/lib.rs"), "pub fn loop_safe_symbol() {}\n").unwrap();
4019        std::os::unix::fs::symlink(&root, root.join("src/loop")).unwrap();
4020
4021        let config = source_config(root.clone(), Language::Rust);
4022        let db = IndexDatabase::rebuild(&config).unwrap();
4023
4024        assert_eq!(db.symbols("loop_safe_symbol", Some(Language::Rust), 10).unwrap().len(), 1);
4025
4026        fs::remove_dir_all(root).unwrap();
4027    }
4028
4029    #[test]
4030    fn dirty_git_files_are_indexed_as_worktree_overlay() {
4031        let root = unique_temp_root();
4032        let _ = fs::remove_dir_all(&root);
4033        let docs = root.join("docs");
4034        fs::create_dir_all(&docs).unwrap();
4035        fs::write(docs.join("search.md"), "# Title\nbase token\n").unwrap();
4036        run_git(&root, &["init"]);
4037        run_git(&root, &["add", "."]);
4038        run_git(
4039            &root,
4040            &[
4041                "-c",
4042                "user.name=Rag Rat Test",
4043                "-c",
4044                "user.email=rag-rat@example.invalid",
4045                "commit",
4046                "-m",
4047                "initial",
4048            ],
4049        );
4050
4051        let config = markdown_config_for_root(root.clone());
4052        let db = IndexDatabase::rebuild(&config).unwrap();
4053        assert_eq!(db.search("base", 10, false).unwrap().len(), 1);
4054
4055        fs::write(docs.join("search.md"), "# Title\noverlay token\n").unwrap();
4056        let db = IndexDatabase::index_changed(&config).unwrap();
4057        let scopes = db
4058            .storage
4059            .connection()
4060            .prepare(
4061                "
4062                SELECT commit_sha != '', worktree_id != ''
4063                FROM main.files
4064                WHERE path = 'docs/search.md'
4065                ORDER BY commit_sha != '' DESC, worktree_id != '' DESC
4066                ",
4067            )
4068            .unwrap()
4069            .query_map([], |row| Ok((row.get::<_, bool>(0)?, row.get::<_, bool>(1)?)))
4070            .unwrap()
4071            .collect::<Result<Vec<_>, _>>()
4072            .unwrap();
4073
4074        assert_eq!(scopes, vec![(true, false), (false, true)]);
4075        assert!(db.search("base", 10, false).unwrap().is_empty());
4076        let overlay_hits = db.search("overlay", 10, false).unwrap();
4077        assert_eq!(overlay_hits.len(), 1);
4078        assert!(overlay_hits[0].summary.contains("overlay token"));
4079
4080        fs::remove_dir_all(root).unwrap();
4081    }
4082
4083    #[test]
4084    fn rebuild_populates_revision_metadata_and_fresh_fts_state() {
4085        let (root, config) = markdown_config("alpha token");
4086        let db = IndexDatabase::rebuild(&config).unwrap();
4087        let status = db.status(&config.database).unwrap();
4088
4089        assert!(!status.content_revision.is_empty());
4090        assert_eq!(status.fts_source_revision.as_deref(), Some(status.content_revision.as_str()));
4091        assert_eq!(
4092            db.meta("content_revision").unwrap().as_deref(),
4093            Some(status.content_revision.as_str())
4094        );
4095        assert!(!status.fts_dirty);
4096        assert!(status.fts_fresh);
4097        assert!(!status.git_history.available);
4098        assert_eq!(status.git_history.commit_count, 0);
4099        assert_eq!(status.local_ai.embedding.state, "MissingModel");
4100        assert_eq!(status.local_ai.fastembed.backend, "fastembed");
4101        assert_eq!(status.local_ai.fastembed.model, ai::FASTEMBED_DISPLAY_MODEL);
4102        assert_eq!(status.local_ai.fastembed.dim, ai::FASTEMBED_EMBEDDING_DIM);
4103        assert!(!status.local_ai.fastembed.cache.is_empty());
4104        assert_eq!(status.local_ai.fastembed.build_feature_enabled, cfg!(feature = "fastembed"));
4105        assert_eq!(status.local_ai.artifacts.total_chunks, 1);
4106        assert_eq!(
4107            status.local_ai.artifacts.eligible_chunks + status.local_ai.artifacts.skipped_chunks,
4108            status.local_ai.artifacts.total_chunks
4109        );
4110        assert_eq!(
4111            status.local_ai.fastembed.eligible_embeddings
4112                + status.local_ai.fastembed.skipped_embeddings,
4113            status.local_ai.artifacts.total_chunks
4114        );
4115        assert_eq!(indexed_revision_count(&db), 1);
4116        assert_eq!(chunk_source_revision_count(&db), 1);
4117
4118        fs::remove_dir_all(root).unwrap();
4119    }
4120
4121    #[cfg(not(feature = "fastembed"))]
4122    #[test]
4123    fn fastembed_missing_feature_reports_rebuild_command() {
4124        let (root, config) = markdown_config("alpha token\n");
4125        let db = IndexDatabase::rebuild(&config).unwrap();
4126
4127        let err = db.install_model(ai::FASTEMBED_MODEL_ID).unwrap_err();
4128        assert!(err.to_string().contains(ai::FASTEMBED_MISSING_FEATURE_MESSAGE));
4129
4130        let status = db.local_ai_status().unwrap();
4131        assert!(!status.fastembed.build_feature_enabled);
4132        assert_eq!(status.fastembed.status, "MissingRuntime");
4133        assert_eq!(
4134            status.fastembed.message.as_deref(),
4135            Some(ai::FASTEMBED_MISSING_FEATURE_MESSAGE)
4136        );
4137        assert_eq!(status.fastembed.next.as_deref(), Some("cargo install rag-rat"));
4138
4139        fs::remove_dir_all(root).unwrap();
4140    }
4141
4142    #[test]
4143    fn reconcile_requires_explicit_model_install_and_ignores_stale_artifacts() {
4144        let (root, config) = markdown_config(
4145            "alpha token\nsecond line with enough detail for the semantic embedding policy to keep this chunk\nthird line with runtime context\n",
4146        );
4147        let db = IndexDatabase::rebuild(&config).unwrap();
4148        let chunk_id = first_chunk_id(&db);
4149
4150        let models = db.list_models().unwrap();
4151        let embedding = models.iter().find(|model| model.model_id == ai::HASH_MODEL_ID).unwrap();
4152        assert!(!embedding.installed);
4153        assert_eq!(embedding.status, "MissingModel");
4154
4155        let hits = db.search("alpha", 10, false).unwrap();
4156        assert_eq!(hits.len(), 1);
4157        assert!(hits[0].summary.contains("alpha token"));
4158
4159        let blocked = db.reconcile(Some(1), Some(8)).unwrap();
4160        assert_eq!(blocked.processed_chunks, 0);
4161        assert_eq!(blocked.embeddings_written, 0);
4162        assert_eq!(blocked.blocked_chunks, 0);
4163        assert_eq!(blocked.model_id, ai::HASH_MODEL_ID);
4164        assert_eq!(blocked.batch_size, 8);
4165        assert_eq!(blocked.status, "Blocked");
4166
4167        let status = db.local_ai_status().unwrap();
4168        assert_eq!(status.embedding.state, "MissingModel");
4169        assert_eq!(status.embedding.blocked_artifacts, 0);
4170
4171        db.install_model(ai::HASH_MODEL_ID).unwrap();
4172        let plan = db.reconcile_plan().unwrap();
4173        assert_eq!(plan.embeddings.missing, 1);
4174        assert_eq!(plan.embeddings.current, 0);
4175        let current = db.reconcile(Some(1), Some(8)).unwrap();
4176        assert_eq!(current.embeddings_written, 1);
4177        assert_eq!(current.model_id, ai::HASH_MODEL_ID);
4178        assert_eq!(current.model_version, "hash-v1");
4179        assert_eq!(current.embedding_dim, ai::HASH_EMBEDDING_DIM);
4180        assert_eq!(current.status, "Current");
4181        assert_eq!(current.work_reasons.get("Missing"), Some(&1));
4182        let noop = db.reconcile(None, Some(8)).unwrap();
4183        assert_eq!(noop.processed_chunks, 0);
4184        assert_eq!(noop.embeddings_written, 0);
4185        let status = db.local_ai_status().unwrap();
4186        assert_eq!(status.embedding.state, "Ready");
4187        assert_eq!(status.embedding.current_artifacts, 1);
4188        let embedding_bytes: i64 = db
4189            .storage
4190            .connection()
4191            .query_row(
4192                "SELECT length(vector_blob) FROM chunk_embeddings WHERE chunk_id = ?1 AND status = 'Current'",
4193                [chunk_id],
4194                |row| row.get(0),
4195            )
4196            .unwrap();
4197        assert_eq!(embedding_bytes, (ai::HASH_EMBEDDING_DIM * 4) as i64);
4198
4199        let hits = db.search("alpha", 10, false).unwrap();
4200        assert!(hits[0].summary.contains("alpha token"));
4201
4202        db.storage.connection().execute("DELETE FROM chunk_fts", []).unwrap();
4203        let vector_hits = db.search("alpha", 10, false).unwrap();
4204        assert_eq!(vector_hits.len(), 1);
4205        assert_eq!(vector_hits[0].chunk_id, chunk_id);
4206
4207        db.storage
4208            .connection()
4209            .execute(
4210                "UPDATE chunk_embeddings SET source_text_hash = 'old-hash' WHERE chunk_id = ?1",
4211                [chunk_id],
4212            )
4213            .unwrap();
4214        let plan = db.reconcile_plan().unwrap();
4215        assert_eq!(plan.embeddings.current, 0);
4216        assert_eq!(plan.embeddings.stale, 1);
4217        let refreshed = db.reconcile(None, Some(8)).unwrap();
4218        assert_eq!(refreshed.processed_chunks, 1);
4219        assert_eq!(refreshed.work_reasons.get("SourceChanged"), Some(&1));
4220        assert_eq!(db.current_embedding_count(ai::HASH_MODEL_ID).unwrap(), 1);
4221        let stale_embedding_hits = db.search("alpha", 10, false).unwrap();
4222        assert_eq!(stale_embedding_hits.len(), 1);
4223
4224        fs::remove_dir_all(root).unwrap();
4225    }
4226
4227    #[cfg(feature = "fastembed")]
4228    #[test]
4229    fn cached_fastembed_model_recovers_ready_state() {
4230        let (root, config) = markdown_config("alpha token\n");
4231        let db = IndexDatabase::rebuild(&config).unwrap();
4232        let cache_dir = root.join("models");
4233        let revision = "5f1b8cd78bc4fb444dd171e59b18f3a3af89a079";
4234        let repo = cache_dir.join("models--Qdrant--all-MiniLM-L6-v2-onnx");
4235        fs::create_dir_all(repo.join("refs")).unwrap();
4236        fs::create_dir_all(repo.join("snapshots").join(revision)).unwrap();
4237        fs::write(repo.join("refs").join("main"), revision).unwrap();
4238
4239        ai::recover_cached_fastembed_model_at(db.storage.connection(), &cache_dir).unwrap();
4240
4241        let models = db.list_models().unwrap();
4242        let fastembed =
4243            models.iter().find(|model| model.model_id == ai::FASTEMBED_MODEL_ID).unwrap();
4244        assert!(fastembed.installed);
4245        assert_eq!(fastembed.status, "Ready");
4246        let status = db.local_ai_status().unwrap();
4247        assert_eq!(status.fastembed.status, "Ready");
4248        assert!(status.fastembed.active);
4249
4250        fs::remove_dir_all(root).unwrap();
4251    }
4252
4253    #[cfg(feature = "fastembed")]
4254    #[test]
4255    fn compatible_migrate_recovers_cached_fastembed_model() {
4256        let (root, config) = markdown_config("alpha token\n");
4257        let db = IndexDatabase::rebuild(&config).unwrap();
4258        let cache_dir = root.join("models");
4259        let revision = "5f1b8cd78bc4fb444dd171e59b18f3a3af89a079";
4260        let repo = cache_dir.join("models--Qdrant--all-MiniLM-L6-v2-onnx");
4261        fs::create_dir_all(repo.join("refs")).unwrap();
4262        fs::create_dir_all(repo.join("snapshots").join(revision)).unwrap();
4263        fs::write(repo.join("refs").join("main"), revision).unwrap();
4264        db.storage
4265            .connection()
4266            .execute(
4267                "UPDATE ai_models
4268                 SET installed = 0, status = 'MissingModel', installed_at_ms = NULL
4269                 WHERE model_id = ?1",
4270                [ai::FASTEMBED_MODEL_ID],
4271            )
4272            .unwrap();
4273
4274        IndexDatabase::migrate_with_fastembed_cache(&config.database, Some(&cache_dir)).unwrap();
4275
4276        let db = IndexDatabase::open(&config.database).unwrap();
4277        let status = db.local_ai_status().unwrap();
4278        assert_eq!(status.fastembed.status, "Ready");
4279        assert!(status.fastembed.active);
4280
4281        fs::remove_dir_all(root).unwrap();
4282    }
4283
4284    #[test]
4285    fn reconcile_without_limit_processes_all_chunks() {
4286        let (root, config) = markdown_config(
4287            "# One\nalpha token with enough surrounding detail for embedding eligibility and useful semantic context\n\n# Two\nbeta token with enough surrounding detail for embedding eligibility and useful semantic context\n",
4288        );
4289        let db = IndexDatabase::rebuild(&config).unwrap();
4290        db.install_model(ai::HASH_MODEL_ID).unwrap();
4291
4292        let report = db.reconcile(None, Some(2)).unwrap();
4293
4294        assert_eq!(report.processed_chunks, 2);
4295        assert_eq!(report.embeddings_written, 2);
4296        assert_eq!(report.batch_size, 2);
4297        assert_eq!(db.current_embedding_count(ai::HASH_MODEL_ID).unwrap(), 2);
4298        let second = db.reconcile(None, Some(2)).unwrap();
4299        assert_eq!(second.processed_chunks, 0);
4300
4301        fs::remove_dir_all(root).unwrap();
4302    }
4303
4304    #[test]
4305    fn reconcile_treats_c_chunks_as_embedding_eligible() {
4306        let root = unique_temp_root();
4307        let _ = fs::remove_dir_all(&root);
4308        fs::create_dir_all(root.join("src")).unwrap();
4309        fs::write(
4310            root.join("src/main.c"),
4311            r#"
4312static int read_sensor_value(int baseline)
4313{
4314    int adjusted = baseline + 42;
4315    return adjusted;
4316}
4317
4318int main(void)
4319{
4320    int sample = read_sensor_value(7);
4321    return sample == 49 ? 0 : 1;
4322}
4323"#,
4324        )
4325        .unwrap();
4326        let config = source_config(root.clone(), Language::C);
4327        let db = IndexDatabase::rebuild(&config).unwrap();
4328        db.install_model(ai::HASH_MODEL_ID).unwrap();
4329
4330        let plan = db.reconcile_plan().unwrap();
4331
4332        assert_eq!(plan.embeddings.skipped_by_policy.get("SkipLanguageUnsupported"), None);
4333        assert!(plan.embeddings.missing > 0, "plan: {:?}", plan.embeddings);
4334
4335        let report = db.reconcile(None, Some(8)).unwrap();
4336        assert!(report.embeddings_written > 0, "report: {report:?}");
4337
4338        fs::remove_dir_all(root).unwrap();
4339    }
4340
4341    #[test]
4342    fn reconcile_policy_skips_tiny_chunks_before_embedding() {
4343        let (root, config) = markdown_config("tiny\n");
4344        let db = IndexDatabase::rebuild(&config).unwrap();
4345        db.install_model(ai::HASH_MODEL_ID).unwrap();
4346
4347        let plan = db.reconcile_plan().unwrap();
4348        assert_eq!(plan.embeddings.missing, 0);
4349        assert_eq!(plan.embeddings.skipped_by_policy.get("SkipTooSmall"), Some(&1));
4350
4351        let report = db.reconcile(None, Some(8)).unwrap();
4352        assert_eq!(report.embeddings_written, 0);
4353        assert_eq!(report.skipped_by_policy.get("SkipTooSmall"), Some(&1));
4354        assert_eq!(db.current_embedding_count(ai::HASH_MODEL_ID).unwrap(), 0);
4355
4356        fs::remove_dir_all(root).unwrap();
4357    }
4358
4359    #[test]
4360    fn reconcile_plan_reports_policy_skips_for_fastembed_model() {
4361        let (root, config) = markdown_config("tiny\n");
4362        let db = IndexDatabase::rebuild(&config).unwrap();
4363        db.storage
4364            .connection()
4365            .execute(
4366                "UPDATE ai_models
4367                 SET installed = 1, disabled = 0, status = 'Ready', embedding_dim = ?2
4368                 WHERE model_id = ?1",
4369                params![
4370                    ai::FASTEMBED_MODEL_ID,
4371                    i64::try_from(ai::FASTEMBED_EMBEDDING_DIM).unwrap()
4372                ],
4373            )
4374            .unwrap();
4375        db.storage
4376            .connection()
4377            .execute(
4378                "INSERT INTO index_meta(key, value) VALUES ('active_embedding_model', ?1)
4379                 ON CONFLICT(key) DO UPDATE SET value = excluded.value",
4380                [ai::FASTEMBED_MODEL_ID],
4381            )
4382            .unwrap();
4383
4384        let plan = db.reconcile_plan().unwrap();
4385
4386        assert_eq!(plan.embeddings.model_id, ai::FASTEMBED_MODEL_ID);
4387        assert_eq!(plan.embeddings.missing, 0);
4388        assert_eq!(plan.embeddings.skipped_by_policy.get("SkipTooSmall"), Some(&1));
4389
4390        fs::remove_dir_all(root).unwrap();
4391    }
4392
4393    #[cfg(not(feature = "fastembed"))]
4394    #[test]
4395    fn blocked_fastembed_reconcile_still_reports_policy_skips() {
4396        let (root, config) = markdown_config("tiny\n");
4397        let db = IndexDatabase::rebuild(&config).unwrap();
4398        db.storage
4399            .connection()
4400            .execute(
4401                "INSERT INTO index_meta(key, value) VALUES ('active_embedding_model', ?1)
4402                 ON CONFLICT(key) DO UPDATE SET value = excluded.value",
4403                [ai::FASTEMBED_MODEL_ID],
4404            )
4405            .unwrap();
4406
4407        let report = db.reconcile(None, Some(8)).unwrap();
4408
4409        assert_eq!(report.status, "Blocked");
4410        assert_eq!(report.skipped_by_policy.get("SkipTooSmall"), Some(&1));
4411
4412        fs::remove_dir_all(root).unwrap();
4413    }
4414
4415    #[test]
4416    fn search_explain_reports_weighted_score_components() {
4417        let (root, config) = markdown_config(
4418            "alpha runtime shutdown\nsecond line with enough detail for embedding eligibility and semantic vector scoring\nthird line\n",
4419        );
4420        let db = IndexDatabase::rebuild(&config).unwrap();
4421        db.install_model(ai::HASH_MODEL_ID).unwrap();
4422        db.reconcile(None, Some(8)).unwrap();
4423
4424        let hits = db.search_explain("runtime shutdown", 10, false).unwrap();
4425
4426        assert_eq!(hits.len(), 1);
4427        let components = hits[0].score_components.as_ref().unwrap();
4428        let component_sum = components.bm25
4429            + components.vector
4430            + components.symbol
4431            + components.graph
4432            + components.git
4433            + components.github;
4434        assert!((hits[0].score - component_sum).abs() < 0.000_001);
4435        assert!(components.bm25 > 0.0);
4436        assert!(components.vector > 0.0);
4437        assert!(components.vector_note.is_none());
4438        assert!(components.bm25 <= 0.45);
4439        assert!(components.vector <= 0.35);
4440        assert!(components.symbol <= 0.10);
4441        assert!(components.graph <= 0.05);
4442        assert!(components.git <= 0.03);
4443        assert!(components.github <= 0.02);
4444        assert!(db.search("runtime shutdown", 10, false).unwrap()[0].score_components.is_none());
4445
4446        fs::remove_dir_all(root).unwrap();
4447    }
4448
4449    #[test]
4450    fn search_explain_labels_missing_vector_runtime() {
4451        let (root, config) = markdown_config(
4452            "alpha runtime shutdown\nsecond line with enough detail for lexical search without embeddings\nthird line\n",
4453        );
4454        let db = IndexDatabase::rebuild(&config).unwrap();
4455
4456        let hits = db.search_explain("runtime shutdown", 10, false).unwrap();
4457
4458        assert_eq!(hits.len(), 1);
4459        let components = hits[0].score_components.as_ref().unwrap();
4460        assert!(components.bm25 > 0.0);
4461        assert_eq!(components.vector, 0.0);
4462        assert_eq!(
4463            components.vector_note.as_deref(),
4464            Some("vector search unavailable: no current embedding model")
4465        );
4466
4467        fs::remove_dir_all(root).unwrap();
4468    }
4469
4470    #[test]
4471    fn git_history_indexes_commits_paths_queries_and_blame() {
4472        let root = unique_temp_root();
4473        let _ = fs::remove_dir_all(&root);
4474        fs::create_dir_all(root.join("docs")).unwrap();
4475        fs::create_dir_all(root.join("src")).unwrap();
4476        run_git(&root, &["init"]);
4477        run_git(&root, &["config", "user.name", "Rag Rat"]);
4478        run_git(&root, &["config", "user.email", "rag@example.com"]);
4479
4480        fs::write(root.join("docs/search.md"), "# Title\nalpha token\n").unwrap();
4481        fs::write(root.join("src/lib.rs"), "pub fn tracked_symbol() {}\n").unwrap();
4482        run_git(&root, &["add", "."]);
4483        run_git(&root, &["commit", "-m", "Add alpha docs"]);
4484
4485        fs::write(root.join("docs/search.md"), "# Title\nbeta token\n").unwrap();
4486        run_git(&root, &["add", "."]);
4487        run_git(&root, &["commit", "-m", "Refresh beta docs"]);
4488
4489        let config = Config {
4490            root: root.clone(),
4491            database: root.join(".rag-rat/index.sqlite"),
4492            targets: vec![
4493                ResolvedTarget {
4494                    name: "markdown".to_string(),
4495                    language: Language::Markdown,
4496                    directories: vec![PathBuf::from("docs")],
4497                    include: vec!["**/*.md".to_string()],
4498                    exclude: Vec::new(),
4499                    kind: TargetKind::Docs,
4500                },
4501                ResolvedTarget {
4502                    name: "rust".to_string(),
4503                    language: Language::Rust,
4504                    directories: vec![PathBuf::from("src")],
4505                    include: vec!["**/*.rs".to_string()],
4506                    exclude: Vec::new(),
4507                    kind: TargetKind::Source,
4508                },
4509            ],
4510            local_ai: Default::default(),
4511        };
4512        let db = IndexDatabase::rebuild(&config).unwrap();
4513        let status = db.status(&config.database).unwrap();
4514        assert!(status.git_history.available);
4515        assert!(status.git_history.head.is_some());
4516        assert_eq!(status.git_history.indexed_head, status.git_history.head);
4517        assert_eq!(status.git_history.commit_count, 2);
4518        assert_eq!(status.git_history.file_change_count, 3);
4519
4520        let commit_hits = db.commit_search("beta", 10).unwrap();
4521        assert_eq!(commit_hits.len(), 1);
4522        assert_eq!(commit_hits[0].subject, "Refresh beta docs");
4523        assert_eq!(commit_hits[0].evidence_kind, "historical");
4524        assert!(commit_hits[0].score > 0.0);
4525
4526        let path_history = db.git_history_for_path("docs/search.md", 10).unwrap();
4527        assert_eq!(path_history.len(), 2);
4528        assert!(path_history.iter().all(|item| item.evidence_kind == "historical"));
4529
4530        let symbol_history =
4531            db.git_history_for_symbol("tracked_symbol", Some(Language::Rust), 10).unwrap();
4532        assert_eq!(symbol_history.len(), 1);
4533        assert_eq!(symbol_history[0].path, "src/lib.rs");
4534        assert_eq!(symbol_history[0].evidence_kind, "historical");
4535        let impact = db.impact_surface("tracked_symbol", 10).unwrap();
4536        assert!(impact.iter().any(|item| {
4537            item.category == "Direct structural impact" && item.reason == "exact_symbol_definition"
4538        }));
4539        assert!(impact.iter().any(|item| {
4540            item.category == "Historical/papertrail evidence"
4541                && item.reason == "git_commit_touched_file"
4542        }));
4543
4544        let query_commits = db.commits_touching_query("beta", 10).unwrap();
4545        let beta_commit =
4546            query_commits.iter().find(|hit| hit.subject == "Refresh beta docs").unwrap();
4547        assert!(beta_commit.evidence.iter().any(|value| value == "commit_message"));
4548        assert!(beta_commit.evidence.iter().any(|value| value == "file_change"));
4549        assert_eq!(beta_commit.evidence_kind, "historical");
4550
4551        let chunk_id = first_chunk_id(&db);
4552        let blame = db.git_blame_chunk(chunk_id).unwrap().unwrap();
4553        assert_eq!(blame.source_text_hash, hex_sha256("# Title\nbeta token\n".as_bytes()));
4554        assert_eq!(blame.line_count, 2);
4555        assert_eq!(blame.commit_counts.values().sum::<i64>(), 2);
4556        assert!(blame.dominant_commit_lines >= 1);
4557        assert!(blame.dominant_commit.is_some());
4558        assert_eq!(blame.evidence_kind, "historical");
4559        let cached = db.git_blame_chunk(chunk_id).unwrap().unwrap();
4560        assert_eq!(cached.source_text_hash, blame.source_text_hash);
4561
4562        fs::remove_dir_all(root).unwrap();
4563    }
4564
4565    #[test]
4566    fn indexes_rust_graph_edges_from_tree_sitter() {
4567        let root = unique_temp_root();
4568        let _ = fs::remove_dir_all(&root);
4569        fs::create_dir_all(root.join("src")).unwrap();
4570        fs::write(
4571            root.join("src/lib.rs"),
4572            r#"
4573use crate::worker::Worker;
4574mod worker;
4575
4576trait Service {
4577    fn serve(&self);
4578}
4579
4580struct Worker;
4581
4582impl Service for Worker {
4583    fn serve(&self) {
4584        helper();
4585    }
4586}
4587
4588fn helper() {}
4589
4590fn caller() {
4591    helper();
4592    Worker.serve();
4593}
4594"#,
4595        )
4596        .unwrap();
4597        let config = source_config(root.clone(), Language::Rust);
4598        let db = IndexDatabase::rebuild(&config).unwrap();
4599
4600        assert_edge(&db, "caller", "helper", "calls_name", "Syntactic");
4601        assert_edge(&db, "Worker", "Service", "implements", "Syntactic");
4602        assert_edge(&db, "src/lib.rs", "worker", "imports", "Syntactic");
4603        let callers = db.find_callers("helper", 10).unwrap();
4604        assert!(
4605            callers.iter().any(|edge| {
4606                edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("caller"))
4607                    && edge.edge_kind == "calls_name"
4608            }),
4609            "helper callers: {callers:?}"
4610        );
4611
4612        fs::remove_dir_all(root).unwrap();
4613    }
4614
4615    #[test]
4616    fn ffi_surface_labels_exported_impl_members_separately() {
4617        let root = unique_temp_root();
4618        let _ = fs::remove_dir_all(&root);
4619        fs::create_dir_all(root.join("src")).unwrap();
4620        fs::write(
4621            root.join("src/lib.rs"),
4622            r#"
4623pub struct PhraseRepo;
4624
4625#[uniffi::export]
4626impl PhraseRepo {
4627    pub fn children(&self) {}
4628    pub fn journal(&self) {}
4629}
4630
4631#[cfg_attr(not(target_arch = "wasm32"), uniffi::export(async_runtime = "tokio"))]
4632impl Runtime {
4633    pub fn route_search_query(&self) {}
4634}
4635
4636pub struct Runtime;
4637
4638/// Not #[uniffi::export]: this is an internal helper.
4639pub fn internal_helper() {}
4640
4641#[cfg_attr(target_arch = "wasm32", ::uniffi::export)]
4642pub fn exported_fn() {}
4643"#,
4644        )
4645        .unwrap();
4646        let config = source_config(root.clone(), Language::Rust);
4647        let db = IndexDatabase::rebuild(&config).unwrap();
4648
4649        let surface = db.ffi_surface(20).unwrap();
4650        assert!(
4651            surface.iter().any(|item| {
4652                item.reason == "rust_uniffi_export"
4653                    && item.symbol.as_deref().is_some_and(|symbol| symbol.ends_with("exported_fn"))
4654            }),
4655            "direct export should remain direct: {surface:?}"
4656        );
4657        assert!(
4658            surface.iter().any(|item| item.reason == "rust_uniffi_exported_impl"),
4659            "exported impl/type surface should be explicit: {surface:?}"
4660        );
4661        assert!(
4662            surface.iter().any(|item| {
4663                item.reason == "rust_uniffi_impl_member"
4664                    && item
4665                        .symbol
4666                        .as_deref()
4667                        .is_some_and(|symbol| symbol.ends_with("route_search_query"))
4668            }),
4669            "cfg_attr exported impl member should be labeled separately: {surface:?}"
4670        );
4671        assert!(
4672            surface.iter().any(|item| {
4673                item.reason == "rust_uniffi_impl_member"
4674                    && item.symbol.as_deref().is_some_and(|symbol| symbol.ends_with("children"))
4675            }),
4676            "impl member should be labeled separately: {surface:?}"
4677        );
4678        assert!(
4679            !surface.iter().any(|item| {
4680                item.reason == "rust_uniffi_export"
4681                    && item.symbol.as_deref().is_some_and(|symbol| {
4682                        symbol.ends_with("children") || symbol.ends_with("journal")
4683                    })
4684            }),
4685            "impl members must not be reported as direct exports: {surface:?}"
4686        );
4687        assert!(
4688            !surface.iter().any(|item| {
4689                item.symbol.as_deref().is_some_and(|symbol| symbol.ends_with("internal_helper"))
4690            }),
4691            "comment-only UniFFI mentions must not create FFI surface rows: {surface:?}"
4692        );
4693
4694        fs::remove_dir_all(root).unwrap();
4695    }
4696
4697    #[test]
4698    fn search_and_read_chunk_attach_bounded_graph_evidence() {
4699        let root = unique_temp_root();
4700        let _ = fs::remove_dir_all(&root);
4701        fs::create_dir_all(root.join("src")).unwrap();
4702        fs::write(
4703            root.join("src/lib.rs"),
4704            "pub fn helper() {}\n\npub fn caller() {\n    helper();\n}\n",
4705        )
4706        .unwrap();
4707        let config = source_config(root.clone(), Language::Rust);
4708        let db = IndexDatabase::rebuild(&config).unwrap();
4709
4710        let hits = db.search("helper caller", 10, false).unwrap();
4711        let helper_hit = hits
4712            .iter()
4713            .find(|hit| hit.symbol_path.as_deref().is_some_and(|path| path.ends_with("helper")))
4714            .expect("helper search hit");
4715        let helper_graph = helper_hit.graph.as_ref().expect("helper graph evidence");
4716        assert_eq!(helper_graph.caller_count, 1);
4717        assert!(helper_graph.top_callers.iter().any(|caller| {
4718            caller.symbol_path.ends_with("caller")
4719                && caller.callsite.line == 4
4720                && caller.callsite.span == [4, 4]
4721                && caller.confidence == "syntactic"
4722        }));
4723        assert!(helper_graph.callers.is_empty(), "search keeps graph compact");
4724
4725        let caller_hit = hits
4726            .iter()
4727            .find(|hit| hit.symbol_path.as_deref().is_some_and(|path| path.ends_with("caller")))
4728            .expect("caller search hit");
4729        let caller_graph = caller_hit.graph.as_ref().expect("caller graph evidence");
4730        assert!(caller_graph.top_callees.iter().any(|callee| {
4731            callee.target == "helper"
4732                && callee.callsite.line == 4
4733                && callee.callsite.span == [4, 4]
4734                && callee.confidence == "syntactic"
4735        }));
4736
4737        let chunk = db.read_chunk(caller_hit.chunk_id).unwrap().expect("caller chunk");
4738        let full_graph = chunk.graph.as_ref().expect("full read_chunk graph");
4739        assert!(full_graph.symbol.as_ref().is_some_and(|symbol| symbol.name == "caller"));
4740        assert!(
4741            full_graph
4742                .callees
4743                .iter()
4744                .any(|callee| callee.target == "helper" && callee.callsite.line == 4)
4745        );
4746        assert!(full_graph.notes.iter().any(|note| note.contains("tree-sitter/syntactic")));
4747
4748        fs::remove_dir_all(root).unwrap();
4749    }
4750
4751    #[test]
4752    fn graph_exact_mode_requires_verified_symbol_identity() {
4753        let root = unique_temp_root();
4754        let _ = fs::remove_dir_all(&root);
4755        fs::create_dir_all(root.join("src")).unwrap();
4756        fs::write(
4757            root.join("src/lib.rs"),
4758            "pub fn helper() {}\n\npub fn caller() {\n    helper();\n}\n",
4759        )
4760        .unwrap();
4761        let config = source_config(root.clone(), Language::Rust);
4762        let db = IndexDatabase::rebuild(&config).unwrap();
4763        let helper = db.symbols("helper", Some(Language::Rust), 10).unwrap().remove(0);
4764        let caller = db.symbols("caller", Some(Language::Rust), 10).unwrap().remove(0);
4765
4766        let bare_exact = db
4767            .find_callers_with_options(
4768                "helper",
4769                10,
4770                &crate::query::graph::GraphTraversalOptions {
4771                    resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
4772                    ..Default::default()
4773                },
4774            )
4775            .unwrap();
4776        assert!(bare_exact.is_empty(), "bare exact lookup should not fall back: {bare_exact:?}");
4777
4778        let exact_callers = db
4779            .find_callers_with_options(
4780                "helper",
4781                10,
4782                &crate::query::graph::GraphTraversalOptions {
4783                    resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
4784                    symbol_id: Some(helper.symbol_id),
4785                    ..Default::default()
4786                },
4787            )
4788            .unwrap();
4789        assert!(
4790            exact_callers.iter().any(|edge| {
4791                edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("caller"))
4792                    && edge.verified_target_symbol
4793            }),
4794            "exact callers: {exact_callers:?}"
4795        );
4796        assert!(exact_callers.iter().all(|edge| edge.verified_target_symbol));
4797
4798        let exact_callees = db
4799            .trace_callees_with_options(
4800                "caller",
4801                10,
4802                &crate::query::graph::GraphTraversalOptions {
4803                    resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
4804                    symbol_id: Some(caller.symbol_id),
4805                    ..Default::default()
4806                },
4807            )
4808            .unwrap();
4809        assert!(
4810            exact_callees.iter().any(|edge| {
4811                edge.target.as_deref() == Some("helper") && edge.verified_target_symbol
4812            }),
4813            "exact callees: {exact_callees:?}"
4814        );
4815        assert!(exact_callees.iter().all(|edge| edge.verified_target_symbol));
4816
4817        fs::remove_dir_all(root).unwrap();
4818    }
4819
4820    #[test]
4821    fn symbol_lookup_ranks_type_definitions_before_impl_blocks() {
4822        let root = unique_temp_root();
4823        let _ = fs::remove_dir_all(&root);
4824        fs::create_dir_all(root.join("src")).unwrap();
4825        fs::write(
4826            root.join("src/lib.rs"),
4827            r#"
4828impl Database {
4829    pub fn open() -> Self {
4830        Database
4831    }
4832}
4833
4834pub struct Database;
4835"#,
4836        )
4837        .unwrap();
4838        let config = source_config(root.clone(), Language::Rust);
4839        let db = IndexDatabase::rebuild(&config).unwrap();
4840        let hits = db.symbols("Database", Some(Language::Rust), 10).unwrap();
4841        assert!(hits.len() >= 2, "fixture should expose both impl and struct symbols: {hits:?}");
4842        assert_eq!(hits[0].kind, "struct", "Database lookup should prefer type definition");
4843        assert!(
4844            hits.iter().any(|hit| hit.kind == "impl"),
4845            "impl Database should still be available after the struct: {hits:?}"
4846        );
4847
4848        fs::remove_dir_all(root).unwrap();
4849    }
4850
4851    #[test]
4852    fn logical_symbol_exact_mode_covers_duplicate_rust_variants() {
4853        let root = unique_temp_root();
4854        let _ = fs::remove_dir_all(&root);
4855        fs::create_dir_all(root.join("src")).unwrap();
4856        fs::write(
4857            root.join("src/lib.rs"),
4858            r#"
4859#[cfg(not(target_arch = "wasm32"))]
4860pub fn spawn_blocking() {}
4861
4862#[cfg(target_arch = "wasm32")]
4863pub fn spawn_blocking() {}
4864
4865pub fn caller() {
4866    spawn_blocking();
4867}
4868"#,
4869        )
4870        .unwrap();
4871        let config = source_config(root.clone(), Language::Rust);
4872        let db = IndexDatabase::rebuild(&config).unwrap();
4873        let lookup = db
4874            .symbol_candidates(&crate::query::symbol::SymbolSelector {
4875                logical_symbol_id: None,
4876                symbol_id: None,
4877                symbol_path: None,
4878                symbol: Some("spawn_blocking".to_string()),
4879                language: Some(Language::Rust),
4880                allow_ambiguous: true,
4881                limit: 10,
4882            })
4883            .unwrap();
4884        let logical_symbol_id = lookup.candidates[0].logical_symbol_id.expect("logical id");
4885        assert_eq!(lookup.candidates[0].logical_variant_count, Some(2));
4886        assert_eq!(lookup.candidates[0].logical_group_reason.as_deref(), Some("cfg_variant"));
4887
4888        let exact_variant_callers = db
4889            .find_callers_with_options(
4890                "spawn_blocking",
4891                10,
4892                &crate::query::graph::GraphTraversalOptions {
4893                    resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
4894                    symbol_id: Some(lookup.candidates[1].symbol_id),
4895                    ..Default::default()
4896                },
4897            )
4898            .unwrap();
4899        assert!(
4900            exact_variant_callers.iter().any(|edge| {
4901                edge.from_symbol.as_deref().is_some_and(|symbol| symbol.ends_with("caller"))
4902                    && edge.target.as_deref() == Some("spawn_blocking")
4903                    && edge.verified_target_symbol
4904            }),
4905            "symbol_id exact should include its logical cfg group: {exact_variant_callers:?}"
4906        );
4907        assert!(exact_variant_callers.iter().all(|edge| edge.verified_target_symbol));
4908
4909        let exact_logical = db
4910            .graph_traversal_report(
4911                "find_callers",
4912                &lookup.candidates[0],
4913                true,
4914                10,
4915                &crate::query::graph::GraphTraversalOptions {
4916                    resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
4917                    symbol_id: Some(lookup.candidates[0].symbol_id),
4918                    ..Default::default()
4919                },
4920            )
4921            .unwrap();
4922        assert_eq!(exact_logical.query.logical_symbol_id, Some(logical_symbol_id));
4923        assert_eq!(
4924            exact_logical.logical_symbol.as_ref().map(|symbol| symbol.variant_count),
4925            Some(2)
4926        );
4927        assert_eq!(exact_logical.variants.len(), 2);
4928        assert!(exact_logical.results.iter().all(|edge| edge.verified_target_symbol));
4929        assert!(
4930            exact_logical.results.iter().any(|edge| {
4931                edge.from_symbol.as_deref().is_some_and(|symbol| symbol.ends_with("caller"))
4932                    && edge.target.as_deref() == Some("spawn_blocking")
4933            }),
4934            "logical exact callers: {exact_logical:?}"
4935        );
4936
4937        fs::remove_dir_all(root).unwrap();
4938    }
4939
4940    #[test]
4941    fn indexes_real_world_rust_graph_patterns() {
4942        let root = fixture_temp_root("graph-realworld/rust");
4943        let config = source_config(root.clone(), Language::Rust);
4944        let db = IndexDatabase::rebuild(&config).unwrap();
4945
4946        assert_edge(&db, "src/lib.rs", "worker", "imports", "Syntactic");
4947        assert_edge(&db, "src/lib.rs", "Worker", "exports", "Syntactic");
4948        assert_edge(&db, "entry", "new", "calls_name", "NameOnly");
4949        assert_edge(&db, "entry", "Client", "references_type", "Syntactic");
4950        assert_edge(&db, "drive", "serve", "calls_name", "NameOnly");
4951        assert_edge(&db, "drive", "GenericRunner", "references_type", "Syntactic");
4952        assert_edge(&db, "Worker", "Service", "implements", "Syntactic");
4953        assert_edge(&db, "generic_call", "T", "references_type", "NameOnly");
4954        assert_edge(&db, "entry", "generated_call", "uses_macro", "NameOnly");
4955        let syntactic_callers = db.find_callers("serve", 10).unwrap();
4956        assert!(
4957            syntactic_callers.is_empty(),
4958            "syntactic serve callers should avoid receiver/name fallback: {syntactic_callers:?}"
4959        );
4960        let callers = db
4961            .find_callers_with_options(
4962                "serve",
4963                10,
4964                &crate::query::graph::GraphTraversalOptions {
4965                    resolution_mode: crate::query::graph::GraphResolutionMode::Fuzzy,
4966                    ..Default::default()
4967                },
4968            )
4969            .unwrap();
4970        assert!(
4971            callers.iter().any(|edge| {
4972                edge.edge_kind == "calls_name"
4973                    && edge.edge_confidence == edge.confidence
4974                    && edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("drive"))
4975            }),
4976            "serve callers: {callers:?}"
4977        );
4978
4979        fs::remove_dir_all(root).unwrap();
4980    }
4981
4982    #[test]
4983    fn indexes_typescript_graph_edges_from_tree_sitter() {
4984        let root = unique_temp_root();
4985        let _ = fs::remove_dir_all(&root);
4986        fs::create_dir_all(root.join("src")).unwrap();
4987        fs::write(
4988            root.join("src/helper.ts"),
4989            "export function helper() {}\nexport const Card = () => null;\n",
4990        )
4991        .unwrap();
4992        fs::write(
4993            root.join("src/App.tsx"),
4994            r#"
4995import { helper, Card } from "./helper";
4996
4997export function run() {
4998  helper();
4999  return <Card />;
5000}
5001
5002export const callRun = () => run();
5003"#,
5004        )
5005        .unwrap();
5006        let config = source_config(root.clone(), Language::TypeScript);
5007        let db = IndexDatabase::rebuild(&config).unwrap();
5008
5009        assert_edge(&db, "run", "helper", "calls_name", "Syntactic");
5010        assert_edge(&db, "run", "Card", "references_type", "Syntactic");
5011        assert_edge(&db, "src/App.tsx", "helper", "imports", "Syntactic");
5012        assert_edge(&db, "src/App.tsx", "run", "exports", "Syntactic");
5013        let callees = db.trace_callees("callRun", 10).unwrap();
5014        assert!(
5015            callees.iter().any(|edge| {
5016                edge.to_symbol.as_deref().is_some_and(|name| name.ends_with("run"))
5017                    && edge.confidence == "Syntactic"
5018            }),
5019            "callRun callees: {callees:?}"
5020        );
5021
5022        fs::remove_dir_all(root).unwrap();
5023    }
5024
5025    #[test]
5026    fn indexes_c_graph_edges_from_tree_sitter() {
5027        let root = unique_temp_root();
5028        let _ = fs::remove_dir_all(&root);
5029        fs::create_dir_all(root.join("src")).unwrap();
5030        fs::write(
5031            root.join("src/runtime.c"),
5032            r#"
5033typedef struct Runtime Runtime;
5034
5035struct Runtime {
5036  int state;
5037};
5038
5039int helper(Runtime *runtime) {
5040  return runtime->state;
5041}
5042
5043int runtime_open(Runtime *runtime) {
5044  return helper(runtime);
5045}
5046"#,
5047        )
5048        .unwrap();
5049        let config = source_config(root.clone(), Language::C);
5050        let db = IndexDatabase::rebuild(&config).unwrap();
5051
5052        assert_edge(&db, "runtime_open", "helper", "calls_name", "Syntactic");
5053
5054        fs::remove_dir_all(root).unwrap();
5055    }
5056
5057    #[test]
5058    fn indexes_c_file_scope_macro_regions_for_search() {
5059        let root = unique_temp_root();
5060        let _ = fs::remove_dir_all(&root);
5061        fs::create_dir_all(root.join("drivers/entropy")).unwrap();
5062        fs::write(
5063            root.join("drivers/entropy/entropy.c"),
5064            r#"
5065static int entropy_init(const struct device *dev)
5066{
5067    ARG_UNUSED(dev);
5068    return 0;
5069}
5070
5071/* Entropy driver APIs structure */
5072static DEVICE_API(entropy, entropy_cryptoacc_trng_api) = {
5073    .get_entropy = entropy_cryptoacc_trng_get_entropy,
5074};
5075
5076DEVICE_DT_INST_DEFINE(0, entropy_init, NULL, NULL, NULL,
5077                      PRE_KERNEL_1, CONFIG_ENTROPY_INIT_PRIORITY,
5078                      &entropy_cryptoacc_trng_api);
5079"#,
5080        )
5081        .unwrap();
5082        let config = Config {
5083            root: root.clone(),
5084            database: root.join(".rag-rat/index.sqlite"),
5085            targets: vec![ResolvedTarget {
5086                name: "c".to_string(),
5087                language: Language::C,
5088                directories: vec![PathBuf::from("drivers/entropy")],
5089                include: vec!["**/*.c".to_string()],
5090                exclude: Vec::new(),
5091                kind: TargetKind::Source,
5092            }],
5093            local_ai: Default::default(),
5094        };
5095        let db = IndexDatabase::rebuild(&config).unwrap();
5096
5097        let hits = db.search("DEVICE_API", 5, false).unwrap();
5098        assert!(
5099            hits.iter().any(|hit| {
5100                hit.path == "drivers/entropy/entropy.c" && hit.summary.contains("DEVICE_API")
5101            }),
5102            "DEVICE_API hits: {hits:?}"
5103        );
5104
5105        fs::remove_dir_all(root).unwrap();
5106    }
5107
5108    #[test]
5109    fn indexes_cpp_graph_edges_from_tree_sitter() {
5110        let root = unique_temp_root();
5111        let _ = fs::remove_dir_all(&root);
5112        fs::create_dir_all(root.join("src")).unwrap();
5113        fs::write(
5114            root.join("src/runtime.cpp"),
5115            r#"
5116namespace held {
5117class Runtime {
5118public:
5119  void open();
5120};
5121
5122void helper() {}
5123
5124void Runtime::open() {
5125  helper();
5126}
5127}
5128"#,
5129        )
5130        .unwrap();
5131        let config = source_config(root.clone(), Language::Cpp);
5132        let db = IndexDatabase::rebuild(&config).unwrap();
5133
5134        assert_edge(&db, "open", "helper", "calls_name", "Syntactic");
5135
5136        fs::remove_dir_all(root).unwrap();
5137    }
5138
5139    #[test]
5140    fn indexes_real_world_typescript_graph_patterns() {
5141        let root = fixture_temp_root("graph-realworld/typescript");
5142        let config = source_config(root.clone(), Language::TypeScript);
5143        let db = IndexDatabase::rebuild(&config).unwrap();
5144
5145        assert_edge(&db, "src/lib.tsx", "DefaultWidget", "imports", "Syntactic");
5146        assert_edge(&db, "src/lib.tsx", "WidgetNS", "imports", "NameOnly");
5147        assert_edge(&db, "src/lib.tsx", "WidgetProps", "imports", "Syntactic");
5148        assert_edge(&db, "src/lib.tsx", "ReExportedWidget", "exports", "NameOnly");
5149        assert_edge(&db, "useWidget", "useMemo", "calls_name", "NameOnly");
5150        assert_edge(&db, "useWidget", "DefaultWidget", "calls_name", "Syntactic");
5151        assert_edge(&db, "Shell", "renderWidget", "calls_name", "NameOnly");
5152        assert_edge(&db, "Shell", "WidgetNS", "references_type", "NameOnly");
5153        assert_edge(&db, "Shell", "DefaultWidget", "references_type", "Syntactic");
5154        assert_edge(&db, "DefaultWidget", "WidgetProps", "references_type", "Syntactic");
5155        let callees = db
5156            .trace_callees_with_options(
5157                "Shell",
5158                10,
5159                &crate::query::graph::GraphTraversalOptions {
5160                    include_references: true,
5161                    edge_kinds: None,
5162                    ..Default::default()
5163                },
5164            )
5165            .unwrap();
5166        assert!(
5167            callees.iter().any(|edge| {
5168                edge.edge_kind == "references_type"
5169                    && edge.edge_confidence == edge.confidence
5170                    && edge.to_symbol.as_deref().is_some_and(|name| name.ends_with("DefaultWidget"))
5171            }),
5172            "Shell callees: {callees:?}"
5173        );
5174
5175        fs::remove_dir_all(root).unwrap();
5176    }
5177
5178    #[test]
5179    fn rust_macro_edges_do_not_resolve_to_same_named_modules() {
5180        let root = unique_temp_root();
5181        let _ = fs::remove_dir_all(&root);
5182        fs::create_dir_all(root.join("src")).unwrap();
5183        fs::write(
5184            root.join("src/lib.rs"),
5185            r#"
5186mod format;
5187
5188fn execute_one() {
5189    let _value = format!("hello");
5190}
5191"#,
5192        )
5193        .unwrap();
5194        fs::write(root.join("src/format.rs"), "pub fn helper() {}\n").unwrap();
5195        let config = source_config(root.clone(), Language::Rust);
5196        let db = IndexDatabase::rebuild(&config).unwrap();
5197
5198        let edge = db
5199            .storage
5200            .connection()
5201            .query_row(
5202                "
5203                SELECT edge_kind, to_name, to_symbol_id, confidence, resolution, evidence
5204                FROM edges
5205                WHERE edge_kind = 'uses_macro'
5206                  AND to_name = 'format'
5207                ",
5208                [],
5209                |row| {
5210                    Ok((
5211                        row.get::<_, String>(0)?,
5212                        row.get::<_, String>(1)?,
5213                        row.get::<_, Option<i64>>(2)?,
5214                        row.get::<_, String>(3)?,
5215                        row.get::<_, String>(4)?,
5216                        row.get::<_, Option<String>>(5)?,
5217                    ))
5218                },
5219            )
5220            .unwrap();
5221        assert_eq!(edge.0, "uses_macro");
5222        assert_eq!(edge.1, "format");
5223        assert_eq!(edge.2, None);
5224        assert_eq!(edge.3, "NameOnly");
5225        assert_eq!(edge.4, "unresolved");
5226        assert!(edge.5.as_deref().is_some_and(|value| value.contains("format!")));
5227
5228        fs::remove_dir_all(root).unwrap();
5229    }
5230
5231    #[test]
5232    fn opening_old_graph_policy_rebuilds_stale_macro_edges() {
5233        let root = unique_temp_root();
5234        let _ = fs::remove_dir_all(&root);
5235        fs::create_dir_all(root.join("src")).unwrap();
5236        fs::write(
5237            root.join("src/lib.rs"),
5238            r#"
5239mod format;
5240
5241fn execute_one() {
5242    let _value = format!("hello");
5243}
5244"#,
5245        )
5246        .unwrap();
5247        fs::write(root.join("src/format.rs"), "pub fn helper() {}\n").unwrap();
5248        let config = source_config(root.clone(), Language::Rust);
5249        let db = IndexDatabase::rebuild(&config).unwrap();
5250        db.storage
5251            .connection()
5252            .execute("UPDATE index_meta SET value = 'old' WHERE key = 'graph_index_version'", [])
5253            .unwrap();
5254        db.storage
5255            .connection()
5256            .execute(
5257                "
5258                UPDATE edges
5259                SET edge_kind = 'calls_name',
5260                    to_symbol_id = (SELECT id FROM symbols WHERE name = 'format' LIMIT 1),
5261                    confidence = 'Syntactic',
5262                    evidence = NULL,
5263                    resolution = 'syntactic'
5264                WHERE to_name = 'format'
5265                ",
5266                [],
5267            )
5268            .unwrap();
5269        drop(db);
5270
5271        let reopened = IndexDatabase::open(&config.database).unwrap();
5272        let edge = reopened
5273            .storage
5274            .connection()
5275            .query_row(
5276                "
5277                SELECT edge_kind, to_symbol_id, confidence, resolution, evidence
5278                FROM edges
5279                WHERE to_name = 'format'
5280                  AND edge_kind = 'uses_macro'
5281                ",
5282                [],
5283                |row| {
5284                    Ok((
5285                        row.get::<_, String>(0)?,
5286                        row.get::<_, Option<i64>>(1)?,
5287                        row.get::<_, String>(2)?,
5288                        row.get::<_, String>(3)?,
5289                        row.get::<_, Option<String>>(4)?,
5290                    ))
5291                },
5292            )
5293            .unwrap();
5294        assert_eq!(edge.0, "uses_macro");
5295        assert_eq!(edge.1, None);
5296        assert_eq!(edge.2, "NameOnly");
5297        assert_eq!(edge.3, "unresolved");
5298        assert!(edge.4.as_deref().is_some_and(|value| value.contains("format!")));
5299
5300        fs::remove_dir_all(root).unwrap();
5301    }
5302
5303    #[test]
5304    fn qualified_common_member_calls_do_not_resolve_by_short_name() {
5305        let root = unique_temp_root();
5306        let _ = fs::remove_dir_all(&root);
5307        fs::create_dir_all(root.join("src")).unwrap();
5308        fs::write(
5309            root.join("src/lib.rs"),
5310            r#"
5311pub struct AlertsStore;
5312
5313impl AlertsStore {
5314    pub fn new() -> Self {
5315        Self
5316    }
5317}
5318
5319pub fn caller() {
5320    let _items: Vec<String> = Vec::new();
5321}
5322"#,
5323        )
5324        .unwrap();
5325        let config = source_config(root.clone(), Language::Rust);
5326        let db = IndexDatabase::rebuild(&config).unwrap();
5327
5328        let edge = db
5329            .storage
5330            .connection()
5331            .query_row(
5332                "
5333                SELECT to_name, target_qualified_name, to_symbol_id, confidence, resolution
5334                FROM edges
5335                WHERE from_name LIKE '%caller'
5336                  AND edge_kind = 'calls_name'
5337                  AND to_name = 'new'
5338                ",
5339                [],
5340                |row| {
5341                    Ok((
5342                        row.get::<_, String>(0)?,
5343                        row.get::<_, Option<String>>(1)?,
5344                        row.get::<_, Option<i64>>(2)?,
5345                        row.get::<_, String>(3)?,
5346                        row.get::<_, String>(4)?,
5347                    ))
5348                },
5349            )
5350            .unwrap();
5351        assert_eq!(edge.0, "new");
5352        assert_eq!(edge.1.as_deref(), Some("Vec::new"));
5353        assert_eq!(edge.2, None);
5354        assert_eq!(edge.3, "NameOnly");
5355        assert_eq!(edge.4, "unresolved");
5356
5357        fs::remove_dir_all(root).unwrap();
5358    }
5359
5360    #[test]
5361    fn macro_edges_do_not_resolve_to_same_named_typescript_symbols() {
5362        let root = unique_temp_root();
5363        let _ = fs::remove_dir_all(&root);
5364        fs::create_dir_all(root.join("src")).unwrap();
5365        fs::write(
5366            root.join("src/lib.rs"),
5367            r#"
5368fn rust_entry() {
5369    let _payload = json!({"ok": true});
5370}
5371"#,
5372        )
5373        .unwrap();
5374        fs::write(root.join("src/preferences.ts"), "export function json() { return {}; }\n")
5375            .unwrap();
5376        let mut config = source_config(root.clone(), Language::Rust);
5377        config.targets.push(ResolvedTarget {
5378            name: "typescript".to_string(),
5379            language: Language::TypeScript,
5380            directories: vec![PathBuf::from("src")],
5381            include: vec!["**/*.ts".to_string()],
5382            exclude: Vec::new(),
5383            kind: TargetKind::Source,
5384        });
5385        let db = IndexDatabase::rebuild(&config).unwrap();
5386
5387        let edge = db
5388            .storage
5389            .connection()
5390            .query_row(
5391                "
5392                SELECT edge_kind, to_name, to_symbol_id, confidence, resolution, evidence
5393                FROM edges
5394                WHERE edge_kind = 'uses_macro'
5395                  AND to_name = 'json'
5396                ",
5397                [],
5398                |row| {
5399                    Ok((
5400                        row.get::<_, String>(0)?,
5401                        row.get::<_, String>(1)?,
5402                        row.get::<_, Option<i64>>(2)?,
5403                        row.get::<_, String>(3)?,
5404                        row.get::<_, String>(4)?,
5405                        row.get::<_, Option<String>>(5)?,
5406                    ))
5407                },
5408            )
5409            .unwrap();
5410        assert_eq!(edge.0, "uses_macro");
5411        assert_eq!(edge.1, "json");
5412        assert_eq!(edge.2, None);
5413        assert_eq!(edge.3, "NameOnly");
5414        assert_eq!(edge.4, "unresolved");
5415        assert!(edge.5.as_deref().is_some_and(|value| value.contains("json!")));
5416
5417        fs::remove_dir_all(root).unwrap();
5418    }
5419
5420    #[test]
5421    fn qualified_crate_helper_callers_use_name_fallback() {
5422        let root = unique_temp_root();
5423        let _ = fs::remove_dir_all(&root);
5424        fs::create_dir_all(root.join("src")).unwrap();
5425        fs::write(
5426            root.join("src/lib.rs"),
5427            r#"
5428pub mod task_spawn {
5429    pub fn spawn_blocking() {}
5430}
5431
5432pub fn first() {
5433    crate::task_spawn::spawn_blocking();
5434}
5435
5436pub fn second() {
5437    task_spawn::spawn_blocking();
5438}
5439"#,
5440        )
5441        .unwrap();
5442        let config = source_config(root.clone(), Language::Rust);
5443        let db = IndexDatabase::rebuild(&config).unwrap();
5444
5445        let callers = db.find_callers("spawn_blocking", 10).unwrap();
5446        assert!(
5447            callers.iter().any(|edge| {
5448                edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("first"))
5449                    && edge.edge_kind == "calls_name"
5450                    && edge.resolution == "target_name_fallback"
5451            }),
5452            "spawn_blocking callers: {callers:?}"
5453        );
5454        assert!(
5455            callers.iter().any(|edge| {
5456                edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("second"))
5457                    && edge.edge_kind == "calls_name"
5458            }),
5459            "spawn_blocking callers: {callers:?}"
5460        );
5461
5462        fs::remove_dir_all(root).unwrap();
5463    }
5464
5465    #[test]
5466    fn caller_lookup_does_not_match_related_names_or_chain_evidence() {
5467        let root = unique_temp_root();
5468        let _ = fs::remove_dir_all(&root);
5469        fs::create_dir_all(root.join("src")).unwrap();
5470        fs::write(
5471            root.join("src/lib.rs"),
5472            r#"
5473pub mod runtime {
5474    pub mod task_spawn {
5475        pub fn spawn() {}
5476        pub fn spawn_blocking() -> JoinHandle {
5477            JoinHandle
5478        }
5479        pub fn spawn_blocking_handle() {}
5480        pub fn spawn_blocking_offload() -> JoinHandle {
5481            JoinHandle
5482        }
5483    }
5484}
5485
5486pub struct JoinHandle;
5487
5488impl JoinHandle {
5489    pub fn map_err(self) {}
5490}
5491
5492pub fn direct() {
5493    crate::runtime::task_spawn::spawn_blocking();
5494}
5495
5496pub fn related_handle() {
5497    crate::runtime::task_spawn::spawn_blocking_handle();
5498}
5499
5500pub fn related_offload_chain() {
5501    crate::runtime::task_spawn::spawn_blocking_offload().map_err();
5502}
5503
5504pub fn related_spawn_with_text() {
5505    crate::runtime::task_spawn::spawn();
5506}
5507"#,
5508        )
5509        .unwrap();
5510        let config = source_config(root.clone(), Language::Rust);
5511        let db = IndexDatabase::rebuild(&config).unwrap();
5512
5513        let callers = db.find_callers("spawn_blocking", 20).unwrap();
5514        assert!(
5515            callers.iter().any(|edge| {
5516                edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("direct"))
5517                    && edge.target.as_deref() == Some("spawn_blocking")
5518                    && edge.edge_kind == "calls_name"
5519            }),
5520            "spawn_blocking callers: {callers:?}"
5521        );
5522        assert!(
5523            callers.iter().all(|edge| {
5524                !edge.from_symbol.as_deref().is_some_and(|name| {
5525                    name.ends_with("related_handle")
5526                        || name.ends_with("related_offload_chain")
5527                        || name.ends_with("related_spawn_with_text")
5528                }) && !matches!(
5529                    edge.target.as_deref(),
5530                    Some("spawn_blocking_handle" | "spawn_blocking_offload" | "spawn" | "map_err")
5531                )
5532            }),
5533            "caller lookup leaked related names or chain evidence: {callers:?}"
5534        );
5535
5536        let qualified_callers = db.find_callers("src/lib.rs::spawn_blocking", 20).unwrap();
5537        assert!(
5538            qualified_callers.iter().any(|edge| {
5539                edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("direct"))
5540                    && edge.target.as_deref() == Some("spawn_blocking")
5541                    && edge.edge_kind == "calls_name"
5542            }),
5543            "qualified spawn_blocking callers: {qualified_callers:?}"
5544        );
5545        assert!(
5546            qualified_callers.iter().all(|edge| {
5547                !edge.from_symbol.as_deref().is_some_and(|name| {
5548                    name.ends_with("related_handle")
5549                        || name.ends_with("related_offload_chain")
5550                        || name.ends_with("related_spawn_with_text")
5551                }) && !matches!(
5552                    edge.target.as_deref(),
5553                    Some("spawn_blocking_handle" | "spawn_blocking_offload" | "spawn" | "map_err")
5554                )
5555            }),
5556            "qualified caller lookup leaked related names or chain evidence: {qualified_callers:?}"
5557        );
5558
5559        fs::remove_dir_all(root).unwrap();
5560    }
5561
5562    #[test]
5563    fn files_past_the_old_structural_cap_still_contribute_symbols_and_edges() {
5564        let root = unique_temp_root();
5565        let _ = fs::remove_dir_all(&root);
5566        fs::create_dir_all(root.join("src")).unwrap();
5567        let filler =
5568            (0..700).map(|idx| format!("pub fn filler_{idx}() {{}}\n")).collect::<String>();
5569        fs::write(
5570            root.join("src/lib.rs"),
5571            format!(
5572                r#"
5573pub mod task_spawn {{
5574    pub fn spawn_blocking() {{}}
5575}}
5576
5577{filler}
5578
5579pub fn caller() {{
5580    crate::task_spawn::spawn_blocking();
5581}}
5582"#
5583            ),
5584        )
5585        .unwrap();
5586        let config = source_config(root.clone(), Language::Rust);
5587        assert!(fs::metadata(root.join("src/lib.rs")).unwrap().len() > 10_000);
5588        let db = IndexDatabase::rebuild(&config).unwrap();
5589
5590        let symbols = db.symbols("caller", Some(Language::Rust), 10).unwrap();
5591        assert!(
5592            symbols.iter().any(|symbol| symbol.name == "caller"),
5593            "caller symbols: {symbols:?}"
5594        );
5595        let callers = db.find_callers("spawn_blocking", 10).unwrap();
5596        assert!(
5597            callers.iter().any(|edge| {
5598                edge.edge_kind == "calls_name"
5599                    && edge.target.as_deref() == Some("spawn_blocking")
5600                    && edge.callsite.as_ref().is_some_and(|callsite| callsite.line > 700)
5601            }),
5602            "spawn_blocking callers: {callers:?}"
5603        );
5604        let impact =
5605            db.impact_surface("callers of crate::task_spawn::spawn_blocking in src", 10).unwrap();
5606        assert!(
5607            impact.iter().any(|item| {
5608                item.category == "Direct structural impact" && item.reason == "direct_caller"
5609            }),
5610            "impact: {impact:?}"
5611        );
5612
5613        fs::remove_dir_all(root).unwrap();
5614    }
5615
5616    #[test]
5617    fn impact_surface_uses_high_signal_query_symbols_and_call_edges() {
5618        let root = unique_temp_root();
5619        let _ = fs::remove_dir_all(&root);
5620        fs::create_dir_all(root.join("src")).unwrap();
5621        fs::write(
5622            root.join("src/lib.rs"),
5623            r#"
5624pub mod runtime {
5625    pub fn unrelated_runtime_symbol() {}
5626}
5627
5628pub mod task_spawn {
5629    pub fn spawn_blocking<F, T>(f: F) -> T
5630    where
5631        F: FnOnce() -> T + Send + 'static,
5632        T: Send + 'static,
5633    {
5634        f()
5635    }
5636}
5637
5638pub fn caller() {
5639    crate::task_spawn::spawn_blocking(|| 1);
5640}
5641"#,
5642        )
5643        .unwrap();
5644        let config = source_config(root.clone(), Language::Rust);
5645        let db = IndexDatabase::rebuild(&config).unwrap();
5646        let impact = db
5647            .impact_surface(
5648                "change runtime task_spawn spawn_blocking wasm inline native blocking pool",
5649                20,
5650            )
5651            .unwrap();
5652        assert!(
5653            impact.iter().any(|item| {
5654                item.category == "Direct structural impact"
5655                    && item.reason == "direct_caller"
5656                    && item.symbol.as_deref().is_some_and(|symbol| symbol.ends_with("caller"))
5657            }),
5658            "spawn_blocking caller should be present: {impact:?}"
5659        );
5660        assert!(
5661            impact.iter().all(|item| {
5662                !(item.reason == "exact_symbol_definition"
5663                    && item.symbol.as_deref().is_some_and(|symbol| symbol.ends_with("runtime")))
5664            }),
5665            "broad `runtime` token should not become an exact impact seed: {impact:?}"
5666        );
5667        assert!(
5668            impact.iter().all(|item| {
5669                !item.evidence.iter().any(|evidence| evidence.contains("references_type"))
5670                    && item.symbol.as_deref() != Some("Send")
5671            }),
5672            "type references should not appear as direct impact: {impact:?}"
5673        );
5674
5675        fs::remove_dir_all(root).unwrap();
5676    }
5677
5678    #[test]
5679    fn docs_for_symbol_prefers_local_source_context_before_broad_markdown() {
5680        let root = unique_temp_root();
5681        let _ = fs::remove_dir_all(&root);
5682        fs::create_dir_all(root.join("src/runtime")).unwrap();
5683        fs::create_dir_all(root.join("docs")).unwrap();
5684        fs::write(
5685            root.join("src/runtime/task_spawn.rs"),
5686            r#"
5687pub fn spawn_blocking<F, T>(f: F) -> T
5688where
5689    F: FnOnce() -> T + Send + 'static,
5690    T: Send + 'static,
5691{
5692    f()
5693}
5694"#,
5695        )
5696        .unwrap();
5697        fs::write(
5698            root.join("docs/phrase-persistence.md"),
5699            "# Phrase persistence\nUnrelated notes mention spawn_blocking in passing.\n",
5700        )
5701        .unwrap();
5702        fs::write(
5703            root.join("docs/task_spawn.md"),
5704            "# task_spawn\nLocal task_spawn notes explain spawn_blocking.\n",
5705        )
5706        .unwrap();
5707        let config = Config {
5708            root: root.clone(),
5709            database: root.join(".rag-rat/index.sqlite"),
5710            targets: vec![
5711                ResolvedTarget {
5712                    name: "rust".to_string(),
5713                    language: Language::Rust,
5714                    directories: vec![PathBuf::from("src")],
5715                    include: vec!["src/".to_string()],
5716                    exclude: Vec::new(),
5717                    kind: TargetKind::Source,
5718                },
5719                ResolvedTarget {
5720                    name: "markdown".to_string(),
5721                    language: Language::Markdown,
5722                    directories: vec![PathBuf::from("docs")],
5723                    include: vec!["**/*.md".to_string()],
5724                    exclude: Vec::new(),
5725                    kind: TargetKind::Docs,
5726                },
5727            ],
5728            local_ai: Default::default(),
5729        };
5730        let db = IndexDatabase::rebuild(&config).unwrap();
5731        let symbol = db.symbols("spawn_blocking", Some(Language::Rust), 10).unwrap().remove(0);
5732        let hits = db.docs_for_selected_symbol(&symbol, 10).unwrap();
5733        assert_eq!(hits[0].path, "src/runtime/task_spawn.rs", "docs hits: {hits:?}");
5734        let phrase_index = hits.iter().position(|hit| hit.path == "docs/phrase-persistence.md");
5735        let task_spawn_index = hits.iter().position(|hit| hit.path == "docs/task_spawn.md");
5736        assert!(
5737            phrase_index.is_none_or(|phrase| task_spawn_index.is_some_and(|local| local < phrase)),
5738            "path-local task_spawn docs should outrank unrelated phrase docs: {hits:?}"
5739        );
5740
5741        fs::remove_dir_all(root).unwrap();
5742    }
5743
5744    #[test]
5745    fn partial_tree_sitter_trees_still_contribute_valid_symbols_and_edges() {
5746        let root = unique_temp_root();
5747        let _ = fs::remove_dir_all(&root);
5748        fs::create_dir_all(root.join("src")).unwrap();
5749        fs::write(
5750            root.join("src/lib.rs"),
5751            r#"
5752pub fn helper() {}
5753
5754pub fn caller() {
5755    helper();
5756}
5757
5758fn broken( {
5759"#,
5760        )
5761        .unwrap();
5762        let config = source_config(root.clone(), Language::Rust);
5763        let db = IndexDatabase::rebuild(&config).unwrap();
5764
5765        let symbols = db.symbols("caller", Some(Language::Rust), 10).unwrap();
5766        assert!(
5767            symbols.iter().any(|symbol| symbol.name == "caller"),
5768            "caller symbols: {symbols:?}"
5769        );
5770        assert_edge(&db, "caller", "helper", "calls_name", "Syntactic");
5771
5772        fs::remove_dir_all(root).unwrap();
5773    }
5774
5775    #[test]
5776    fn receiver_method_calls_do_not_bind_to_same_named_free_functions() {
5777        let root = unique_temp_root();
5778        let _ = fs::remove_dir_all(&root);
5779        fs::create_dir_all(root.join("src")).unwrap();
5780        fs::write(
5781            root.join("src/lib.rs"),
5782            r#"
5783pub fn spawn_blocking() {}
5784
5785pub fn caller(joinset: JoinSet) {
5786    joinset.spawn_blocking();
5787}
5788
5789pub struct JoinSet;
5790"#,
5791        )
5792        .unwrap();
5793        let config = source_config(root.clone(), Language::Rust);
5794        let db = IndexDatabase::rebuild(&config).unwrap();
5795
5796        let edge = db
5797            .storage
5798            .connection()
5799            .query_row(
5800                "
5801                SELECT to_name, target_qualified_name, to_symbol_id, confidence, resolution, receiver_hint
5802                FROM edges
5803                WHERE from_name LIKE '%caller'
5804                  AND edge_kind = 'calls_name'
5805                  AND to_name = 'spawn_blocking'
5806                ",
5807                [],
5808                |row| {
5809                    Ok((
5810                        row.get::<_, String>(0)?,
5811                        row.get::<_, Option<String>>(1)?,
5812                        row.get::<_, Option<i64>>(2)?,
5813                        row.get::<_, String>(3)?,
5814                        row.get::<_, String>(4)?,
5815                        row.get::<_, Option<String>>(5)?,
5816                    ))
5817                },
5818            )
5819            .unwrap();
5820        assert_eq!(edge.0, "spawn_blocking");
5821        assert_eq!(edge.1.as_deref(), Some("joinset::spawn_blocking"));
5822        assert_eq!(edge.2, None);
5823        assert_eq!(edge.3, "NameOnly");
5824        assert_eq!(edge.4, "unresolved");
5825        assert_eq!(edge.5.as_deref(), Some("joinset"));
5826
5827        fs::remove_dir_all(root).unwrap();
5828    }
5829
5830    #[test]
5831    fn trace_callees_excludes_type_references_by_default() {
5832        let root = unique_temp_root();
5833        let _ = fs::remove_dir_all(&root);
5834        fs::create_dir_all(root.join("src")).unwrap();
5835        fs::write(
5836            root.join("src/lib.rs"),
5837            r#"
5838pub struct JoinError;
5839pub enum Result<T, E> { Ok(T), Err(E) }
5840pub fn helper() {}
5841
5842pub fn spawn_blocking<F, T>(f: F) -> Result<T, JoinError>
5843where
5844    F: FnOnce() -> T + Send + 'static,
5845    T: Send + 'static,
5846{
5847    helper();
5848    tokio::task::spawn_blocking(f)
5849}
5850"#,
5851        )
5852        .unwrap();
5853        let config = source_config(root.clone(), Language::Rust);
5854        let db = IndexDatabase::rebuild(&config).unwrap();
5855
5856        let default_callees = db.trace_callees("spawn_blocking", 20).unwrap();
5857        assert!(
5858            default_callees.iter().any(|edge| {
5859                edge.edge_kind == "calls_name"
5860                    && edge.target.as_deref() == Some("helper")
5861                    && edge.verified_target_symbol
5862            }),
5863            "default callees: {default_callees:?}"
5864        );
5865        assert!(
5866            default_callees
5867                .iter()
5868                .all(|edge| edge.target_qualified_name.as_deref()
5869                    != Some("tokio::task::spawn_blocking")),
5870            "default callees leaked unresolved external call: {default_callees:?}"
5871        );
5872        assert!(
5873            default_callees.iter().all(|edge| edge.edge_kind != "references_type"),
5874            "default callees leaked type refs: {default_callees:?}"
5875        );
5876        assert!(
5877            default_callees.iter().all(|edge| !matches!(
5878                edge.target.as_deref(),
5879                Some("F" | "T" | "Send" | "Result" | "JoinError")
5880            )),
5881            "default callees leaked generic/type targets: {default_callees:?}"
5882        );
5883
5884        let with_refs = db
5885            .trace_callees_with_options(
5886                "spawn_blocking",
5887                20,
5888                &crate::query::graph::GraphTraversalOptions {
5889                    include_references: true,
5890                    edge_kinds: None,
5891                    ..Default::default()
5892                },
5893            )
5894            .unwrap();
5895        assert!(
5896            with_refs.iter().any(|edge| edge.edge_kind == "references_type"),
5897            "reference-enabled callees: {with_refs:?}"
5898        );
5899
5900        let with_unresolved = db
5901            .trace_callees_with_options(
5902                "spawn_blocking",
5903                20,
5904                &crate::query::graph::GraphTraversalOptions {
5905                    include_unresolved: true,
5906                    ..Default::default()
5907                },
5908            )
5909            .unwrap();
5910        assert!(
5911            with_unresolved
5912                .iter()
5913                .any(|edge| edge.target_qualified_name.as_deref()
5914                    == Some("tokio::task::spawn_blocking")),
5915            "unresolved-enabled callees: {with_unresolved:?}"
5916        );
5917
5918        fs::remove_dir_all(root).unwrap();
5919    }
5920
5921    #[test]
5922    fn trace_callees_defaults_to_repo_relevant_calls() {
5923        let root = unique_temp_root();
5924        let _ = fs::remove_dir_all(&root);
5925        fs::create_dir_all(root.join("src")).unwrap();
5926        fs::write(
5927            root.join("src/lib.rs"),
5928            r#"
5929pub fn repo_helper() {}
5930
5931pub fn caller(input: Result<String, String>) -> String {
5932    repo_helper();
5933    let values: Vec<String> = Vec::new();
5934    let _ = input.map_err(|error| error.to_string());
5935    let _ = Some("value").unwrap_or_else(|| "fallback");
5936    let _ = format!("hello");
5937    values.get(0).unwrap_or_else(|| "fallback").to_string()
5938}
5939"#,
5940        )
5941        .unwrap();
5942        let config = source_config(root.clone(), Language::Rust);
5943        let db = IndexDatabase::rebuild(&config).unwrap();
5944
5945        let default_callees = db.trace_callees("caller", 20).unwrap();
5946        assert!(
5947            default_callees.iter().any(|edge| edge.target.as_deref() == Some("repo_helper")),
5948            "default callees should keep repo-local calls: {default_callees:?}"
5949        );
5950        assert!(
5951            default_callees.iter().all(|edge| {
5952                edge.edge_kind != "uses_macro"
5953                    && !matches!(
5954                        edge.target.as_deref(),
5955                        Some("new" | "map_err" | "unwrap_or_else" | "to_string" | "format")
5956                    )
5957            }),
5958            "default callees leaked low-signal calls: {default_callees:?}"
5959        );
5960
5961        let expanded = db
5962            .trace_callees_with_options(
5963                "caller",
5964                20,
5965                &crate::query::graph::GraphTraversalOptions {
5966                    include_unresolved: true,
5967                    include_macros: true,
5968                    include_common_methods: true,
5969                    ..Default::default()
5970                },
5971            )
5972            .unwrap();
5973        assert!(
5974            expanded.iter().any(|edge| edge.edge_kind == "uses_macro"),
5975            "macro-enabled callees: {expanded:?}"
5976        );
5977        assert!(
5978            expanded.iter().any(|edge| edge.target.as_deref() == Some("unwrap_or_else")),
5979            "common-method-enabled callees: {expanded:?}"
5980        );
5981
5982        fs::remove_dir_all(root).unwrap();
5983    }
5984
5985    #[test]
5986    fn indexes_kotlin_graph_edges_from_tree_sitter() {
5987        let root = unique_temp_root();
5988        let _ = fs::remove_dir_all(&root);
5989        fs::create_dir_all(root.join("src")).unwrap();
5990        fs::write(
5991            root.join("src/Main.kt"),
5992            r#"
5993package dev.cq27.test
5994
5995import dev.cq27.lib.ExternalThing
5996
5997interface Syncable
5998
5999class MainBridge : Syncable {
6000  suspend fun syncOnce() {
6001    helper()
6002    ExternalThing()
6003  }
6004}
6005
6006fun helper() {}
6007"#,
6008        )
6009        .unwrap();
6010        let config = source_config(root.clone(), Language::Kotlin);
6011        let db = IndexDatabase::rebuild(&config).unwrap();
6012
6013        assert_edge(&db, "syncOnce", "helper", "calls_name", "Syntactic");
6014        assert_edge(&db, "MainBridge", "Syncable", "implements", "Syntactic");
6015        assert_edge(&db, "src/Main.kt", "ExternalThing", "imports", "NameOnly");
6016        let impact = db.impact_surface("helper", 10).unwrap();
6017        assert!(
6018            impact.iter().any(|item| {
6019                item.category == "Direct structural impact" && item.reason == "direct_caller"
6020            }),
6021            "impact: {impact:?}"
6022        );
6023
6024        fs::remove_dir_all(root).unwrap();
6025    }
6026
6027    #[test]
6028    fn indexes_real_world_kotlin_graph_patterns() {
6029        let root = fixture_temp_root("graph-realworld/kotlin");
6030        let config = source_config(root.clone(), Language::Kotlin);
6031        let db = IndexDatabase::rebuild(&config).unwrap();
6032
6033        assert_edge(&db, "src/Main.kt", "ExternalFactory", "imports", "NameOnly");
6034        assert_edge(&db, "Worker", "companion", "contains", "Exact");
6035        assert_edge(&db, "companion", "create", "contains", "Exact");
6036        assert_edge(&db, "syncOnce", "create", "calls_name", "Syntactic");
6037        assert_edge(&db, "syncOnce", "Worker", "references_type", "Syntactic");
6038        assert_edge(&db, "syncOnce", "run", "calls_name", "Syntactic");
6039        assert_edge(&db, "syncOnce", "SingletonRunner", "references_type", "Syntactic");
6040        assert_edge(&db, "syncOnce", "ExternalFactory", "calls_name", "NameOnly");
6041        assert_edge(&db, "syncOnce", "ExternalFactory", "references_type", "NameOnly");
6042        assert_edge(&db, "syncOnce", "cleaned", "calls_name", "Syntactic");
6043        let callers = db.find_callers("cleaned", 10).unwrap();
6044        assert!(
6045            callers.iter().any(|edge| {
6046                edge.edge_kind == "calls_name"
6047                    && edge.edge_confidence == edge.confidence
6048                    && edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("syncOnce"))
6049            }),
6050            "cleaned callers: {callers:?}"
6051        );
6052
6053        fs::remove_dir_all(root).unwrap();
6054    }
6055
6056    #[test]
6057    fn kotlin_caller_lookup_respects_qualified_receivers_for_common_method_names() {
6058        let root = unique_temp_root();
6059        let _ = fs::remove_dir_all(&root);
6060        fs::create_dir_all(root.join("src")).unwrap();
6061        fs::write(
6062            root.join("src/Main.kt"),
6063            r#"
6064package dev.cq27.test
6065
6066object WatchProposalBuilder {
6067  fun build(): String = "proposal"
6068}
6069
6070class AndroidDialogBuilder {
6071  fun build(): String = "dialog"
6072}
6073
6074fun actualCaller() {
6075  WatchProposalBuilder.build()
6076}
6077
6078fun unrelatedBuilderCalls(dialog: AndroidDialogBuilder) {
6079  dialog.build()
6080  AndroidDialogBuilder().build()
6081}
6082"#,
6083        )
6084        .unwrap();
6085        let config = source_config(root.clone(), Language::Kotlin);
6086        let db = IndexDatabase::rebuild(&config).unwrap();
6087        let target = db
6088            .symbols("build", Some(Language::Kotlin), 10)
6089            .unwrap()
6090            .into_iter()
6091            .find(|symbol| symbol.qualified_name.contains("WatchProposalBuilder"))
6092            .expect("WatchProposalBuilder.build symbol");
6093        let callers = db
6094            .find_callers_with_options(
6095                "build",
6096                20,
6097                &crate::query::graph::GraphTraversalOptions {
6098                    resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
6099                    symbol_id: Some(target.symbol_id),
6100                    ..Default::default()
6101                },
6102            )
6103            .unwrap();
6104        assert_eq!(
6105            callers
6106                .iter()
6107                .filter(|edge| edge
6108                    .from_symbol
6109                    .as_deref()
6110                    .is_some_and(|name| name.ends_with("actualCaller")))
6111                .count(),
6112            1,
6113            "actual caller should be present once: {callers:?}"
6114        );
6115        assert!(
6116            callers.iter().all(|edge| edge
6117                .from_symbol
6118                .as_deref()
6119                .is_none_or(|name| !name.ends_with("unrelatedBuilderCalls"))),
6120            "unrelated builder calls should not resolve to WatchProposalBuilder.build: {callers:?}"
6121        );
6122
6123        fs::remove_dir_all(root).unwrap();
6124    }
6125
6126    #[test]
6127    fn github_sync_caches_papertrail_and_rationale_without_query_time_crawling() {
6128        let (root, config) =
6129            markdown_config("# Decision\nRefs cq27-dev/rag-rat#42\nwe will keep sqlite\n");
6130        let db = IndexDatabase::rebuild(&config).unwrap();
6131        let mock = MockGitHubClient;
6132
6133        let offline =
6134            github::sync_from_refs::<MockGitHubClient>(db.storage.connection(), &root, None, true)
6135                .unwrap();
6136        assert!(offline.offline);
6137        assert_eq!(offline.discovered_refs, 1);
6138        assert_eq!(offline.synced_items, 0);
6139
6140        let report =
6141            github::sync_from_refs(db.storage.connection(), &root, Some(&mock), false).unwrap();
6142        assert!(!report.offline);
6143        assert_eq!(report.discovered_refs, 1);
6144        assert_eq!(report.synced_items, 5);
6145        assert_eq!(report.status.issues, 1);
6146        assert_eq!(report.status.comments, 1);
6147        assert_eq!(report.status.pulls, 1);
6148        assert_eq!(report.status.reviews, 1);
6149        assert_eq!(report.status.review_comments, 1);
6150
6151        let issue_hits = db.github_issue_search("sqlite", 10).unwrap();
6152        assert_eq!(issue_hits.len(), 1);
6153        assert_eq!(issue_hits[0].classification, "decision");
6154        assert_eq!(issue_hits[0].evidence_kind, "historical_github");
6155
6156        let refs = db.github_refs_for_path("docs/search.md", 10).unwrap();
6157        assert_eq!(refs.len(), 1);
6158        assert_eq!(refs[0].source_kind, "file");
6159
6160        let rationale = db.rationale_search("risk", 10).unwrap();
6161        assert!(rationale.iter().any(|item| item.classification == "risk"));
6162        let issue_ref_rationale = db.rationale_search("Fixes #42", 10).unwrap();
6163        assert_eq!(issue_ref_rationale.first().map(|item| item.number), Some(42));
6164        assert_eq!(
6165            issue_ref_rationale.first().map(|item| item.evidence_kind),
6166            Some("literal_github_ref")
6167        );
6168        assert_eq!(issue_ref_rationale.first().map(|item| item.score), Some(1.0));
6169        assert!(
6170            issue_ref_rationale.iter().any(|item| item.number == 42),
6171            "issue ref rationale should use structured GitHub refs: {issue_ref_rationale:?}"
6172        );
6173
6174        let chunk_id = first_chunk_id(&db);
6175        let papertrail = db.papertrail_for_chunk(chunk_id, 10).unwrap().unwrap();
6176        assert!(papertrail.current_source.is_some());
6177        assert!(!papertrail.github_evidence.is_empty());
6178        assert!(papertrail.github_evidence.iter().all(|item| {
6179            matches!(item.evidence_kind, "historical_github" | "literal_github_ref")
6180        }));
6181
6182        fs::remove_dir_all(root).unwrap();
6183    }
6184
6185    #[test]
6186    fn papertrail_for_commit_prefers_commit_sourced_github_refs() {
6187        let root = unique_temp_root();
6188        let _ = fs::remove_dir_all(&root);
6189        fs::create_dir_all(root.join("docs")).unwrap();
6190        run_git(&root, &["init"]);
6191        run_git(&root, &["config", "user.name", "Rag Rat"]);
6192        run_git(&root, &["config", "user.email", "rag@example.com"]);
6193        fs::write(root.join("docs/search.md"), "# Decision\nalpha\n").unwrap();
6194        run_git(&root, &["add", "."]);
6195        run_git(&root, &["commit", "-m", "Fix search rationale", "-m", "Fixes #42"]);
6196
6197        let config = markdown_config_for_root(root.clone());
6198        let db = IndexDatabase::rebuild(&config).unwrap();
6199        let commit = db
6200            .storage
6201            .connection()
6202            .query_row("SELECT hash FROM git_commits LIMIT 1", [], |row| row.get::<_, String>(0))
6203            .unwrap();
6204        let mock = MockGitHubClient;
6205        github::sync_from_refs(db.storage.connection(), &root, Some(&mock), false).unwrap();
6206
6207        let papertrail = db.papertrail_for_commit(&commit[..7], 10).unwrap();
6208        assert_eq!(papertrail.github_evidence.first().map(|item| item.number), Some(42));
6209        assert_eq!(
6210            papertrail.github_evidence.first().map(|item| item.evidence_kind),
6211            Some("literal_github_ref")
6212        );
6213        assert!(
6214            papertrail.fallback_github_evidence.is_empty(),
6215            "structured commit refs should suppress noisy fallback evidence: {papertrail:?}"
6216        );
6217
6218        fs::remove_dir_all(root).unwrap();
6219    }
6220
6221    #[test]
6222    fn papertrail_for_symbol_dedupes_duplicate_file_refs() {
6223        let root = unique_temp_root();
6224        let _ = fs::remove_dir_all(&root);
6225        fs::create_dir_all(root.join("src")).unwrap();
6226        fs::write(
6227            root.join("src/lib.rs"),
6228            "// First rationale (#42)\n// Second rationale (#42)\npub fn tracked_symbol() {}\n",
6229        )
6230        .unwrap();
6231        let config = source_config(root.clone(), Language::Rust);
6232        let db = IndexDatabase::rebuild(&config).unwrap();
6233        let mock = MockGitHubClient;
6234        github::sync_from_refs(db.storage.connection(), &root, Some(&mock), false).unwrap();
6235        let papertrail = db
6236            .papertrail_for_symbol("tracked_symbol", Some(Language::Rust), 10)
6237            .unwrap()
6238            .expect("tracked symbol papertrail");
6239
6240        assert_eq!(
6241            papertrail
6242                .github_evidence
6243                .iter()
6244                .filter(|item| item.number == 42 && item.item_kind == "issue")
6245                .count(),
6246            1,
6247            "duplicate #42 refs in one file should collapse to one issue evidence row: {papertrail:?}"
6248        );
6249
6250        fs::remove_dir_all(root).unwrap();
6251    }
6252
6253    #[test]
6254    fn github_sync_keeps_partial_cache_and_skips_synced_refs_after_404() {
6255        let (root, config) = markdown_config(
6256            "# Decision\nRefs cq27-dev/rag-rat#42 and cq27-dev/rag-rat#404\nwe will keep sqlite\n",
6257        );
6258        let db = IndexDatabase::rebuild(&config).unwrap();
6259        let mock = PartiallyFailingGitHubClient;
6260
6261        let report =
6262            github::sync_from_refs(db.storage.connection(), &root, Some(&mock), false).unwrap();
6263        assert_eq!(report.discovered_refs, 2);
6264        assert_eq!(report.synced_items, 5);
6265        assert_eq!(report.failed_refs, 1);
6266        assert_eq!(report.errors.len(), 1);
6267        assert_eq!(report.errors[0].number, 404);
6268        assert_eq!(report.errors[0].status, "not_found");
6269
6270        let issue_hits = db.github_issue_search("sqlite", 10).unwrap();
6271        assert_eq!(issue_hits.len(), 1);
6272        assert_eq!(issue_hits[0].number, 42);
6273
6274        let second =
6275            github::sync_from_refs(db.storage.connection(), &root, Some(&mock), false).unwrap();
6276        assert_eq!(second.synced_items, 0);
6277        assert_eq!(second.skipped_refs, 2);
6278        assert_eq!(second.failed_refs, 0);
6279
6280        fs::remove_dir_all(root).unwrap();
6281    }
6282
6283    #[test]
6284    fn search_recovers_when_fts_is_marked_dirty() {
6285        let (root, config) = markdown_config("alpha token");
6286        let db = IndexDatabase::rebuild(&config).unwrap();
6287        db.mark_fts_dirty().unwrap();
6288
6289        let dirty = db.status(&config.database).unwrap();
6290        assert!(dirty.fts_dirty);
6291        assert!(!dirty.fts_fresh);
6292
6293        let hits = db.search("alpha", 10, false).unwrap();
6294        assert_eq!(hits.len(), 1);
6295        assert_eq!(hits[0].summary, "alpha token");
6296        let fresh = db.status(&config.database).unwrap();
6297        assert!(!fresh.fts_dirty);
6298        assert!(fresh.fts_fresh);
6299
6300        fs::remove_dir_all(root).unwrap();
6301    }
6302
6303    #[test]
6304    fn read_chunk_relocates_small_line_drift_to_current_text() {
6305        let (root, config) = markdown_config("# Title\nalpha token\n");
6306        let db = IndexDatabase::rebuild(&config).unwrap();
6307        let chunk_id = first_chunk_id(&db);
6308        fs::write(root.join("docs/search.md"), "inserted\n# Title\nalpha token\n").unwrap();
6309
6310        let chunk = db.read_chunk(chunk_id).unwrap().unwrap();
6311        assert_eq!(chunk.start_line, 2);
6312        assert_eq!(chunk.end_line, 3);
6313        assert_eq!(chunk.text, "# Title\nalpha token\n");
6314
6315        fs::remove_dir_all(root).unwrap();
6316    }
6317
6318    #[test]
6319    fn read_chunk_large_drift_reindexes_and_reports_stale_chunk() {
6320        let (root, config) = markdown_config("# Title\nalpha token\n");
6321        let db = IndexDatabase::rebuild(&config).unwrap();
6322        let chunk_id = first_chunk_id(&db);
6323        fs::write(root.join("docs/search.md"), "# Replacement\nbeta token\n").unwrap();
6324
6325        let err = db.read_chunk(chunk_id).unwrap_err().to_string();
6326        assert!(err.contains("StaleChunk"), "{err}");
6327        let hits = db.search("beta", 10, false).unwrap();
6328        assert_eq!(hits.len(), 1);
6329        assert!(db.search("alpha", 10, false).unwrap().is_empty());
6330
6331        fs::remove_dir_all(root).unwrap();
6332    }
6333
6334    #[test]
6335    fn search_retries_after_healing_stale_hit() {
6336        let (root, config) = markdown_config("# Title\nalpha token\n");
6337        let db = IndexDatabase::rebuild(&config).unwrap();
6338        fs::write(root.join("docs/search.md"), "# Title\nbeta token\n").unwrap();
6339
6340        let hits = db.search("alpha", 10, false).unwrap();
6341        assert!(hits.is_empty());
6342        let beta_hits = db.search("beta", 10, false).unwrap();
6343        assert_eq!(beta_hits.len(), 1);
6344        assert!(beta_hits[0].summary.contains("beta"));
6345
6346        fs::remove_dir_all(root).unwrap();
6347    }
6348
6349    #[test]
6350    fn search_heals_relocated_hits_before_returning_line_spans() {
6351        let (root, config) = markdown_config("# Title\nalpha token\n");
6352        let db = IndexDatabase::rebuild(&config).unwrap();
6353        fs::write(root.join("docs/search.md"), "inserted\n# Title\nalpha token\n").unwrap();
6354
6355        let hits = db.search("alpha", 10, false).unwrap();
6356        assert_eq!(hits.len(), 1);
6357        assert_eq!(hits[0].start_line, 2);
6358        assert_eq!(hits[0].end_line, 3);
6359        assert!(hits[0].summary.contains("alpha"));
6360
6361        fs::remove_dir_all(root).unwrap();
6362    }
6363
6364    #[test]
6365    fn read_chunk_deleted_source_reports_gone() {
6366        let (root, config) = markdown_config("# Title\nalpha token\n");
6367        let db = IndexDatabase::rebuild(&config).unwrap();
6368        let chunk_id = first_chunk_id(&db);
6369        fs::remove_file(root.join("docs/search.md")).unwrap();
6370
6371        let err = db.read_chunk(chunk_id).unwrap_err().to_string();
6372        assert!(err.contains("Gone"), "{err}");
6373        assert!(db.search("alpha", 10, false).unwrap().is_empty());
6374
6375        fs::remove_dir_all(root).unwrap();
6376    }
6377
6378    #[test]
6379    fn search_returns_needs_reindex_when_heal_cap_is_exceeded() {
6380        let root = unique_temp_root();
6381        let _ = fs::remove_dir_all(&root);
6382        let docs = root.join("docs");
6383        fs::create_dir_all(&docs).unwrap();
6384        for index in 0..=MAX_AUTO_HEAL_FILES_PER_CALL {
6385            fs::write(docs.join(format!("doc-{index}.md")), "common stale token\n").unwrap();
6386        }
6387        let config = markdown_config_for_root(root.clone());
6388        let db = IndexDatabase::rebuild(&config).unwrap();
6389        for index in 0..=MAX_AUTO_HEAL_FILES_PER_CALL {
6390            fs::write(docs.join(format!("doc-{index}.md")), "fresh replacement token\n").unwrap();
6391        }
6392
6393        let err = db.search("common", 20, false).unwrap_err().to_string();
6394        assert!(err.contains("needs_reindex"), "{err}");
6395
6396        fs::remove_dir_all(root).unwrap();
6397    }
6398
6399    #[test]
6400    fn heal_index_limit_does_not_warn_when_only_fresh_files_are_skipped() {
6401        let root = unique_temp_root();
6402        let _ = fs::remove_dir_all(&root);
6403        let docs = root.join("docs");
6404        fs::create_dir_all(&docs).unwrap();
6405        fs::write(docs.join("one.md"), "one fresh token\n").unwrap();
6406        fs::write(docs.join("two.md"), "two fresh token\n").unwrap();
6407        let config = markdown_config_for_root(root.clone());
6408        let db = IndexDatabase::rebuild(&config).unwrap();
6409
6410        let report = db.heal_index(Some(1)).unwrap();
6411
6412        assert_eq!(report.healed_files, 0);
6413        assert_eq!(report.removed_files, 0);
6414        assert_eq!(report.skipped_files, 2);
6415        assert_eq!(report.message, None);
6416
6417        fs::remove_dir_all(root).unwrap();
6418    }
6419
6420    #[test]
6421    fn search_recovers_when_fts_revision_is_stale() {
6422        let (root, config) = markdown_config("alpha token");
6423        let db = IndexDatabase::rebuild(&config).unwrap();
6424        db.set_meta("fts_source_revision", "stale").unwrap();
6425
6426        let stale = db.status(&config.database).unwrap();
6427        assert!(!stale.fts_dirty);
6428        assert!(!stale.fts_fresh);
6429
6430        let hits = db.search("alpha", 10, false).unwrap();
6431        assert_eq!(hits.len(), 1);
6432        let fresh = db.status(&config.database).unwrap();
6433        assert_eq!(fresh.fts_source_revision.as_deref(), Some(fresh.content_revision.as_str()));
6434        assert!(fresh.fts_fresh);
6435
6436        fs::remove_dir_all(root).unwrap();
6437    }
6438
6439    #[test]
6440    fn parser_failures_report_paths() {
6441        let root = unique_temp_root();
6442        let _ = fs::remove_dir_all(&root);
6443        let src = root.join("src");
6444        fs::create_dir_all(&src).unwrap();
6445        fs::write(src.join("broken.rs"), "pub fn broken(").unwrap();
6446        let config = Config {
6447            root: root.clone(),
6448            database: root.join(".rag-rat/index.sqlite"),
6449            targets: vec![ResolvedTarget {
6450                name: "rust".to_string(),
6451                language: Language::Rust,
6452                directories: vec![PathBuf::from("src")],
6453                include: vec!["**/*.rs".to_string()],
6454                exclude: Vec::new(),
6455                kind: TargetKind::Source,
6456            }],
6457            local_ai: Default::default(),
6458        };
6459
6460        let db = IndexDatabase::rebuild(&config).unwrap();
6461        let status = db.status(&config.database).unwrap();
6462        assert_eq!(status.parser_failures, 1);
6463        assert_eq!(status.parser_failure_paths[0].path, "src/broken.rs");
6464
6465        fs::remove_dir_all(root).unwrap();
6466    }
6467
6468    #[test]
6469    fn repo_memory_bound_to_logical_symbol_surfaces_in_symbol_chunk_and_impact() {
6470        let root = unique_temp_root();
6471        let _ = fs::remove_dir_all(&root);
6472        fs::create_dir_all(root.join("src")).unwrap();
6473        fs::write(
6474            root.join("src/lib.rs"),
6475            "#[cfg(unix)]\npub fn cfg_helper() {}\n#[cfg(windows)]\npub fn cfg_helper() {}\n",
6476        )
6477        .unwrap();
6478        let config = source_config(root.clone(), Language::Rust);
6479        let db = IndexDatabase::rebuild(&config).unwrap();
6480        let symbol = db
6481            .select_symbol(&crate::query::symbol::SymbolSelector {
6482                logical_symbol_id: None,
6483                symbol_id: None,
6484                symbol_path: None,
6485                symbol: Some("cfg_helper".to_string()),
6486                language: Some(Language::Rust),
6487                allow_ambiguous: true,
6488                limit: 10,
6489            })
6490            .unwrap()
6491            .unwrap()
6492            .expect("selected symbol");
6493        let logical_symbol_id = symbol.logical_symbol_id.expect("logical symbol id");
6494
6495        let created = db
6496            .memory_create(crate::query::memory::RepoMemoryCreate {
6497                kind: "Invariant".to_string(),
6498                title: "Treat cfg helper variants as one logical helper".to_string(),
6499                body: "Caller and impact analysis should use the logical symbol, not one cfg body variant."
6500                    .to_string(),
6501                confidence: "high".to_string(),
6502                created_by: Some("test-agent".to_string()),
6503                source: Some("agent".to_string()),
6504                tags: vec!["cfg".to_string(), "graph".to_string()],
6505                bind: crate::query::memory::RepoMemoryBindTarget {
6506                    logical_symbol_id: Some(logical_symbol_id),
6507                    symbol_id: None,
6508                    chunk_id: None,
6509                    edge_id: None,
6510                    path: None,
6511                    start_line: None,
6512                    end_line: None,
6513                    commit_hash: None,
6514                    github_owner: None,
6515                    github_repo: None,
6516                    github_number: None,
6517                    start_logical_symbol_id: None,
6518                    end_logical_symbol_id: None,
6519                    edge_sequence_hash: None,
6520                    path_summary: None,
6521                },
6522            })
6523            .unwrap();
6524        assert!(!created.duplicate);
6525        assert_eq!(created.memory.bindings[0].binding_kind, "logical_symbol");
6526
6527        let memories = db.memory_for_symbol(&symbol, 10).unwrap();
6528        assert_eq!(memories.len(), 1);
6529        assert_eq!(memories[0].kind, "Invariant");
6530        let chunk_id = memories[0].bindings[0].chunk_id.expect("bound chunk");
6531        let chunk = db.read_chunk(chunk_id).unwrap().expect("memory chunk");
6532        assert_eq!(chunk.memories.len(), 1);
6533        assert_eq!(chunk.memories[0].memory_id, created.memory.memory_id);
6534
6535        let impact = db
6536            .impact_surface_report_for_selected_symbol(
6537                &symbol,
6538                10,
6539                &crate::query::impact::ImpactSurfaceOptions::default(),
6540            )
6541            .unwrap();
6542        assert_eq!(impact.repo_memories.direct.len(), 1);
6543        assert_eq!(impact.completeness_and_caveats.memory_status.active, 1);
6544        assert_eq!(impact.completeness_and_caveats.memory_status.stale, 0);
6545
6546        fs::remove_dir_all(root).unwrap();
6547    }
6548
6549    #[test]
6550    fn repo_memory_validate_marks_changed_or_missing_anchors_non_current() {
6551        let root = unique_temp_root();
6552        let _ = fs::remove_dir_all(&root);
6553        fs::create_dir_all(root.join("src")).unwrap();
6554        fs::write(root.join("src/lib.rs"), "pub fn anchored_memory() {}\n").unwrap();
6555        let config = source_config(root.clone(), Language::Rust);
6556        let db = IndexDatabase::rebuild(&config).unwrap();
6557        let symbol = db
6558            .select_symbol(&crate::query::symbol::SymbolSelector {
6559                logical_symbol_id: None,
6560                symbol_id: None,
6561                symbol_path: None,
6562                symbol: Some("anchored_memory".to_string()),
6563                language: Some(Language::Rust),
6564                allow_ambiguous: false,
6565                limit: 10,
6566            })
6567            .unwrap()
6568            .unwrap()
6569            .expect("selected symbol");
6570        let chunk_id = db
6571            .storage
6572            .connection()
6573            .query_row(
6574                "
6575                SELECT chunks.id
6576                FROM chunks
6577                JOIN files ON files.id = chunks.file_id
6578                WHERE files.path = ?1 AND chunks.symbol_path = ?2
6579                LIMIT 1
6580                ",
6581                params![symbol.path, symbol.qualified_name],
6582                |row| row.get::<_, i64>(0),
6583            )
6584            .unwrap();
6585        let created = db
6586            .memory_create(crate::query::memory::RepoMemoryCreate {
6587                kind: "Risk".to_string(),
6588                title: "Anchor must become stale when source hash changes".to_string(),
6589                body: "Validation should separate stale memories from current repo evidence."
6590                    .to_string(),
6591                confidence: "medium".to_string(),
6592                created_by: Some("test-agent".to_string()),
6593                source: Some("agent".to_string()),
6594                tags: Vec::new(),
6595                bind: crate::query::memory::RepoMemoryBindTarget {
6596                    logical_symbol_id: None,
6597                    symbol_id: None,
6598                    chunk_id: Some(chunk_id),
6599                    edge_id: None,
6600                    path: None,
6601                    start_line: None,
6602                    end_line: None,
6603                    commit_hash: None,
6604                    github_owner: None,
6605                    github_repo: None,
6606                    github_number: None,
6607                    start_logical_symbol_id: None,
6608                    end_logical_symbol_id: None,
6609                    edge_sequence_hash: None,
6610                    path_summary: None,
6611                },
6612            })
6613            .unwrap();
6614
6615        db.storage
6616            .connection()
6617            .execute("UPDATE chunks SET text_hash = 'changed' WHERE id = ?1", [chunk_id])
6618            .unwrap();
6619        let report = db.memory_validate().unwrap();
6620        assert_eq!(report.stale, 1);
6621        let stale = db.memory_for_symbol(&symbol, 10).unwrap();
6622        assert_eq!(stale[0].memory_id, created.memory.memory_id);
6623        assert_eq!(stale[0].bindings[0].anchor_status, "stale");
6624
6625        db.storage.connection().execute("DELETE FROM chunks WHERE id = ?1", [chunk_id]).unwrap();
6626        let report = db.memory_validate().unwrap();
6627        assert_eq!(report.gone, 1);
6628        let gone = db.memory_for_symbol(&symbol, 10).unwrap();
6629        assert_eq!(gone[0].bindings[0].anchor_status, "gone");
6630
6631        fs::remove_dir_all(root).unwrap();
6632    }
6633
6634    #[test]
6635    fn repo_memory_bound_to_edge_surfaces_when_impact_crosses_call_path() {
6636        let root = unique_temp_root();
6637        let _ = fs::remove_dir_all(&root);
6638        fs::create_dir_all(root.join("src")).unwrap();
6639        fs::write(
6640            root.join("src/lib.rs"),
6641            "pub fn target_edge() {}\npub fn caller_edge() {\n    target_edge();\n}\n",
6642        )
6643        .unwrap();
6644        let config = source_config(root.clone(), Language::Rust);
6645        let db = IndexDatabase::rebuild(&config).unwrap();
6646        let target = db
6647            .select_symbol(&crate::query::symbol::SymbolSelector {
6648                logical_symbol_id: None,
6649                symbol_id: None,
6650                symbol_path: None,
6651                symbol: Some("target_edge".to_string()),
6652                language: Some(Language::Rust),
6653                allow_ambiguous: false,
6654                limit: 10,
6655            })
6656            .unwrap()
6657            .unwrap()
6658            .expect("selected target");
6659        let graph_options = crate::query::graph::GraphTraversalOptions {
6660            resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
6661            symbol_id: Some(target.symbol_id),
6662            logical_symbol_id: target.logical_symbol_id,
6663            ..Default::default()
6664        };
6665        let callers =
6666            db.graph_traversal_report("find_callers", &target, true, 10, &graph_options).unwrap();
6667        let edge_id = callers.results[0].edge_id;
6668
6669        let edge_memory = db
6670            .memory_create(crate::query::memory::RepoMemoryCreate {
6671                kind: "Risk".to_string(),
6672                title: "caller_edge to target_edge must stay synchronous".to_string(),
6673                body: "This specific call path is used to prove edge-bound memories surface when impact crosses the edge."
6674                    .to_string(),
6675                confidence: "high".to_string(),
6676                created_by: Some("test-agent".to_string()),
6677                source: Some("agent".to_string()),
6678                tags: vec!["edge".to_string()],
6679                bind: crate::query::memory::RepoMemoryBindTarget {
6680                    logical_symbol_id: None,
6681                    symbol_id: None,
6682                    chunk_id: None,
6683                    edge_id: Some(edge_id),
6684                    path: None,
6685                    start_line: None,
6686                    end_line: None,
6687                    commit_hash: None,
6688                    github_owner: None,
6689                    github_repo: None,
6690                    github_number: None,
6691                    start_logical_symbol_id: None,
6692                    end_logical_symbol_id: None,
6693                    edge_sequence_hash: None,
6694                    path_summary: None,
6695                },
6696            })
6697            .unwrap();
6698        assert_eq!(edge_memory.memory.bindings[0].binding_kind, "edge");
6699        assert_eq!(edge_memory.memory.bindings[0].edge_id, Some(edge_id));
6700
6701        let impact = db
6702            .impact_surface_report_for_selected_symbol(
6703                &target,
6704                10,
6705                &crate::query::impact::ImpactSurfaceOptions {
6706                    resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
6707                    ..Default::default()
6708                },
6709            )
6710            .unwrap();
6711        assert!(impact.repo_memories.direct.is_empty());
6712        assert_eq!(impact.repo_memories.path_crossed.len(), 1);
6713        assert_eq!(impact.repo_memories.path_crossed[0].memory_id, edge_memory.memory.memory_id);
6714        assert_eq!(impact.completeness_and_caveats.memory_status.active, 1);
6715
6716        let call_path_memory = db
6717            .memory_create(crate::query::memory::RepoMemoryCreate {
6718                kind: "TestExpectation".to_string(),
6719                title: "caller_edge path hash recall".to_string(),
6720                body: "Call-path memories are addressable by a deterministic edge sequence hash."
6721                    .to_string(),
6722                confidence: "medium".to_string(),
6723                created_by: Some("test-agent".to_string()),
6724                source: Some("agent".to_string()),
6725                tags: vec!["call-path".to_string()],
6726                bind: crate::query::memory::RepoMemoryBindTarget {
6727                    logical_symbol_id: None,
6728                    symbol_id: None,
6729                    chunk_id: None,
6730                    edge_id: None,
6731                    path: None,
6732                    start_line: None,
6733                    end_line: None,
6734                    commit_hash: None,
6735                    github_owner: None,
6736                    github_repo: None,
6737                    github_number: None,
6738                    start_logical_symbol_id: target.logical_symbol_id,
6739                    end_logical_symbol_id: target.logical_symbol_id,
6740                    edge_sequence_hash: Some("edge-sequence-test-hash".to_string()),
6741                    path_summary: Some("caller_edge -> target_edge".to_string()),
6742                },
6743            })
6744            .unwrap();
6745        let call_path = db.memory_for_call_path_hash("edge-sequence-test-hash", 10).unwrap();
6746        assert_eq!(call_path.len(), 1);
6747        assert_eq!(call_path[0].memory_id, call_path_memory.memory.memory_id);
6748        assert_eq!(call_path[0].call_paths[0].path_summary, "caller_edge -> target_edge");
6749
6750        fs::remove_dir_all(root).unwrap();
6751    }
6752
6753    #[test]
6754    fn repo_brief_ranks_churn_and_god_module_candidates() {
6755        let root = unique_temp_root();
6756        let _ = fs::remove_dir_all(&root);
6757        fs::create_dir_all(root.join("src")).unwrap();
6758        run_git(&root, &["init"]);
6759        run_git(&root, &["config", "user.name", "Rag Rat"]);
6760        run_git(&root, &["config", "user.email", "rag@example.com"]);
6761
6762        fs::write(root.join("src/stable.rs"), "pub fn stable() -> i32 { 1 }\n").unwrap();
6763        fs::write(root.join("src/hot.rs"), hot_module_text(0)).unwrap();
6764        run_git(&root, &["add", "."]);
6765        run_git(&root, &["commit", "-m", "Add initial modules"]);
6766
6767        for revision in 1..=3 {
6768            fs::write(root.join("src/hot.rs"), hot_module_text(revision)).unwrap();
6769            run_git(&root, &["add", "src/hot.rs"]);
6770            run_git(&root, &["commit", "-m", "Iterate hot module"]);
6771        }
6772
6773        let config = Config {
6774            root: root.clone(),
6775            database: root.join(".rag-rat/index.sqlite"),
6776            targets: vec![ResolvedTarget {
6777                name: "rust".to_string(),
6778                language: Language::Rust,
6779                directories: vec![PathBuf::from("src")],
6780                include: vec!["**/*.rs".to_string()],
6781                exclude: Vec::new(),
6782                kind: TargetKind::Source,
6783            }],
6784            local_ai: Default::default(),
6785        };
6786        let db = IndexDatabase::rebuild(&config).unwrap();
6787
6788        let churn = db
6789            .repo_brief(crate::query::repo_brief::RepoBriefOptions {
6790                mode: crate::query::repo_brief::RepoBriefMode::Churn,
6791                limit: 1,
6792                include_generated: false,
6793                include_memories: true,
6794            })
6795            .unwrap();
6796        assert_eq!(churn.candidates[0].path, "src/hot.rs");
6797        assert_eq!(churn.candidates[0].category, "recent_churn_hotspot");
6798        assert!(churn.candidates[0].score <= 1.0);
6799        assert!(churn.candidates[0].metrics.commit_touch_count >= 4);
6800        assert!(churn.candidates[0].why.iter().any(|reason| reason.contains("churn")));
6801
6802        let god_modules = db
6803            .repo_brief(crate::query::repo_brief::RepoBriefOptions {
6804                mode: crate::query::repo_brief::RepoBriefMode::GodModules,
6805                limit: 1,
6806                include_generated: false,
6807                include_memories: true,
6808            })
6809            .unwrap();
6810        assert_eq!(god_modules.candidates[0].path, "src/hot.rs");
6811        assert!(god_modules.candidates[0].score <= 1.0);
6812        assert!(god_modules.candidates[0].metrics.symbol_count >= 30);
6813        assert!(!god_modules.candidates[0].split_hints.is_empty());
6814        assert!(
6815            god_modules.candidates[0].next_tools.iter().any(|tool| tool.tool == "impact_surface")
6816        );
6817
6818        fs::remove_dir_all(root).unwrap();
6819    }
6820
6821    #[test]
6822    fn repo_clusters_groups_cotouched_files() {
6823        let root = unique_temp_root();
6824        let _ = fs::remove_dir_all(&root);
6825        fs::create_dir_all(root.join("src/sync")).unwrap();
6826        fs::create_dir_all(root.join("src/ui")).unwrap();
6827        run_git(&root, &["init"]);
6828        run_git(&root, &["config", "user.name", "Rag Rat"]);
6829        run_git(&root, &["config", "user.email", "rag@example.com"]);
6830
6831        fs::write(root.join("src/sync/actor.rs"), "pub fn sync_actor() -> i32 { 1 }\n").unwrap();
6832        fs::write(root.join("src/sync/msg.rs"), "pub fn sync_msg() -> i32 { 2 }\n").unwrap();
6833        fs::write(root.join("src/ui/app.rs"), "pub fn ui_app() -> i32 { 3 }\n").unwrap();
6834        run_git(&root, &["add", "."]);
6835        run_git(&root, &["commit", "-m", "Add modules"]);
6836
6837        for revision in 1..=2 {
6838            fs::write(
6839                root.join("src/sync/actor.rs"),
6840                format!("pub fn sync_actor() -> i32 {{ {revision} }}\n"),
6841            )
6842            .unwrap();
6843            fs::write(
6844                root.join("src/sync/msg.rs"),
6845                format!("pub fn sync_msg() -> i32 {{ {} }}\n", revision + 10),
6846            )
6847            .unwrap();
6848            run_git(&root, &["add", "src/sync/actor.rs", "src/sync/msg.rs"]);
6849            run_git(&root, &["commit", "-m", "Iterate sync modules"]);
6850        }
6851
6852        let config = Config {
6853            root: root.clone(),
6854            database: root.join(".rag-rat/index.sqlite"),
6855            targets: vec![ResolvedTarget {
6856                name: "rust".to_string(),
6857                language: Language::Rust,
6858                directories: vec![PathBuf::from("src")],
6859                include: vec!["**/*.rs".to_string()],
6860                exclude: Vec::new(),
6861                kind: TargetKind::Source,
6862            }],
6863            local_ai: Default::default(),
6864        };
6865        let db = IndexDatabase::rebuild(&config).unwrap();
6866
6867        let clusters = db
6868            .repo_clusters(crate::query::clusters::RepoClustersOptions {
6869                limit: 5,
6870                include_generated: false,
6871                include_memories: true,
6872                min_cluster_size: 2,
6873            })
6874            .unwrap();
6875
6876        let sync_cluster = clusters
6877            .clusters
6878            .iter()
6879            .find(|cluster| cluster.name == "src/sync")
6880            .expect("sync cluster");
6881        assert!(sync_cluster.representative_paths.contains(&"src/sync/actor.rs".to_string()));
6882        assert!(sync_cluster.representative_paths.contains(&"src/sync/msg.rs".to_string()));
6883        assert!(sync_cluster.metrics.co_touch_edges >= 2);
6884
6885        fs::remove_dir_all(root).unwrap();
6886    }
6887
6888    fn hot_module_text(revision: usize) -> String {
6889        let mut text = String::new();
6890        text.push_str("pub fn entry() -> i32 {\n");
6891        for i in 0..32 {
6892            text.push_str(&format!("    helper_{i}() +\n"));
6893        }
6894        text.push_str(&format!("    {revision}\n}}\n"));
6895        for i in 0..32 {
6896            text.push_str(&format!("pub fn helper_{i}() -> i32 {{ {i} }}\n"));
6897        }
6898        text
6899    }
6900
6901    fn unique_temp_root() -> PathBuf {
6902        let mut root = std::env::temp_dir();
6903        let suffix = TEMP_COUNTER.fetch_add(1, Ordering::Relaxed);
6904        root.push(format!("rag-rat-schema-test-{}-{}-{suffix}", std::process::id(), now_ms()));
6905        root
6906    }
6907
6908    fn fixture_temp_root(fixture: &str) -> PathBuf {
6909        let root = unique_temp_root();
6910        let _ = fs::remove_dir_all(&root);
6911        let fixture_root =
6912            PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../tests/fixtures").join(fixture);
6913        copy_fixture_dir(&fixture_root, &root);
6914        root
6915    }
6916
6917    fn copy_fixture_dir(from: &Path, to: &Path) {
6918        fs::create_dir_all(to).unwrap();
6919        for entry in fs::read_dir(from).unwrap() {
6920            let entry = entry.unwrap();
6921            let from_path = entry.path();
6922            let to_path = to.join(entry.file_name());
6923            if from_path.is_dir() {
6924                copy_fixture_dir(&from_path, &to_path);
6925            } else {
6926                fs::copy(&from_path, &to_path).unwrap();
6927            }
6928        }
6929    }
6930
6931    fn markdown_config(text: &str) -> (PathBuf, Config) {
6932        let root = unique_temp_root();
6933        let _ = fs::remove_dir_all(&root);
6934        let docs = root.join("docs");
6935        fs::create_dir_all(&docs).unwrap();
6936        fs::write(docs.join("search.md"), text).unwrap();
6937        let config = markdown_config_for_root(root.clone());
6938        (root, config)
6939    }
6940
6941    fn markdown_config_for_root(root: PathBuf) -> Config {
6942        Config {
6943            root: root.clone(),
6944            database: root.join(".rag-rat/index.sqlite"),
6945            targets: vec![ResolvedTarget {
6946                name: "markdown".to_string(),
6947                language: Language::Markdown,
6948                directories: vec![PathBuf::from("docs")],
6949                include: vec!["**/*.md".to_string()],
6950                exclude: Vec::new(),
6951                kind: TargetKind::Docs,
6952            }],
6953            local_ai: Default::default(),
6954        }
6955    }
6956
6957    fn source_config(root: PathBuf, language: Language) -> Config {
6958        Config {
6959            root: root.clone(),
6960            database: root.join(".rag-rat/index.sqlite"),
6961            targets: vec![ResolvedTarget {
6962                name: language.as_str().to_string(),
6963                language,
6964                directories: vec![PathBuf::from("src")],
6965                include: vec!["src/".to_string()],
6966                exclude: Vec::new(),
6967                kind: TargetKind::Source,
6968            }],
6969            local_ai: Default::default(),
6970        }
6971    }
6972
6973    fn assert_edge(db: &IndexDatabase, from: &str, to: &str, edge_kind: &str, confidence: &str) {
6974        let count = db
6975            .storage
6976            .connection()
6977            .query_row(
6978                "
6979                SELECT COUNT(*)
6980                FROM edges
6981                WHERE edge_kind = ?1
6982                  AND confidence = ?2
6983                  AND COALESCE(from_name, '') LIKE ?3
6984                  AND to_name LIKE ?4
6985                ",
6986                params![edge_kind, confidence, format!("%{from}%"), format!("%{to}%")],
6987                |row| row.get::<_, i64>(0),
6988            )
6989            .unwrap();
6990        assert!(count > 0, "missing edge {from} -[{edge_kind}/{confidence}]-> {to}");
6991    }
6992
6993    fn table_count(db: &IndexDatabase, table: &str) -> i64 {
6994        db.storage
6995            .connection()
6996            .query_row("SELECT COUNT(*) FROM sqlite_master WHERE name = ?1", [table], |row| {
6997                row.get(0)
6998            })
6999            .unwrap()
7000    }
7001
7002    fn row_count(db: &IndexDatabase, table: &str) -> i64 {
7003        db.storage
7004            .connection()
7005            .query_row(&format!("SELECT COUNT(*) FROM {table}"), [], |row| row.get(0))
7006            .unwrap()
7007    }
7008
7009    fn chunk_columns(db: &IndexDatabase) -> Vec<String> {
7010        table_columns(db, "chunks")
7011    }
7012
7013    fn file_columns(db: &IndexDatabase) -> Vec<String> {
7014        table_columns(db, "files")
7015    }
7016
7017    fn table_columns(db: &IndexDatabase, table: &str) -> Vec<String> {
7018        let mut stmt =
7019            db.storage.connection().prepare(&format!("PRAGMA table_info({table})")).unwrap();
7020        stmt.query_map([], |row| row.get::<_, String>(1)).unwrap().map(Result::unwrap).collect()
7021    }
7022
7023    fn indexed_revision_count(db: &IndexDatabase) -> i64 {
7024        db.storage
7025            .connection()
7026            .query_row("SELECT COUNT(*) FROM files WHERE indexed_revision != ''", [], |row| {
7027                row.get(0)
7028            })
7029            .unwrap()
7030    }
7031
7032    fn chunk_source_revision_count(db: &IndexDatabase) -> i64 {
7033        db.storage
7034            .connection()
7035            .query_row("SELECT COUNT(*) FROM chunks WHERE source_revision != ''", [], |row| {
7036                row.get(0)
7037            })
7038            .unwrap()
7039    }
7040
7041    fn first_chunk_id(db: &IndexDatabase) -> i64 {
7042        db.storage
7043            .connection()
7044            .query_row("SELECT id FROM chunks ORDER BY id LIMIT 1", [], |row| row.get(0))
7045            .unwrap()
7046    }
7047
7048    fn run_git(root: &Path, args: &[&str]) {
7049        let output = Command::new("git").args(args).current_dir(root).output().unwrap();
7050        assert!(
7051            output.status.success(),
7052            "git {:?} failed\nstdout:\n{}\nstderr:\n{}",
7053            args,
7054            String::from_utf8_lossy(&output.stdout),
7055            String::from_utf8_lossy(&output.stderr)
7056        );
7057    }
7058
7059    struct MockGitHubClient;
7060
7061    impl github::GitHubClient for MockGitHubClient {
7062        fn issue(
7063            &self,
7064            owner: &str,
7065            repo: &str,
7066            number: i64,
7067        ) -> anyhow::Result<github::GitHubIssue> {
7068            Ok(github::GitHubIssue {
7069                owner: owner.to_string(),
7070                repo: repo.to_string(),
7071                number,
7072                html_url: format!("https://github.com/{owner}/{repo}/issues/{number}"),
7073                state: "open".to_string(),
7074                title: "Decision: keep sqlite".to_string(),
7075                body: "We decided sqlite is required for binary size.".to_string(),
7076                author: Some("octo".to_string()),
7077                created_at: Some("2026-01-01T00:00:00Z".to_string()),
7078                updated_at: Some("2026-01-02T00:00:00Z".to_string()),
7079                is_pull_request: true,
7080            })
7081        }
7082
7083        fn issue_comments(
7084            &self,
7085            owner: &str,
7086            repo: &str,
7087            number: i64,
7088        ) -> anyhow::Result<Vec<github::GitHubComment>> {
7089            Ok(vec![github::GitHubComment {
7090                id: 4201,
7091                owner: owner.to_string(),
7092                repo: repo.to_string(),
7093                number,
7094                html_url: format!("https://github.com/{owner}/{repo}/issues/{number}#comment-1"),
7095                body: "Rejected alternative: duckdb was too large.".to_string(),
7096                author: Some("octo".to_string()),
7097                created_at: Some("2026-01-01T01:00:00Z".to_string()),
7098                updated_at: Some("2026-01-01T01:00:00Z".to_string()),
7099            }])
7100        }
7101
7102        fn pull(
7103            &self,
7104            owner: &str,
7105            repo: &str,
7106            number: i64,
7107        ) -> anyhow::Result<Option<github::GitHubPullRequest>> {
7108            Ok(Some(github::GitHubPullRequest {
7109                owner: owner.to_string(),
7110                repo: repo.to_string(),
7111                number,
7112                html_url: format!("https://github.com/{owner}/{repo}/pull/{number}"),
7113                state: "open".to_string(),
7114                title: "Use sqlite".to_string(),
7115                body: "Constraint: normal queries must use cache only.".to_string(),
7116                author: Some("octo".to_string()),
7117                created_at: Some("2026-01-01T00:00:00Z".to_string()),
7118                updated_at: Some("2026-01-02T00:00:00Z".to_string()),
7119                merged_at: None,
7120            }))
7121        }
7122
7123        fn pull_reviews(
7124            &self,
7125            owner: &str,
7126            repo: &str,
7127            number: i64,
7128        ) -> anyhow::Result<Vec<github::GitHubReview>> {
7129            Ok(vec![github::GitHubReview {
7130                id: 4202,
7131                owner: owner.to_string(),
7132                repo: repo.to_string(),
7133                number,
7134                html_url: Some(format!("https://github.com/{owner}/{repo}/pull/{number}#review")),
7135                state: "COMMENTED".to_string(),
7136                body: "Risk: live crawling during search would be surprising.".to_string(),
7137                author: Some("reviewer".to_string()),
7138                submitted_at: Some("2026-01-01T02:00:00Z".to_string()),
7139            }])
7140        }
7141
7142        fn pull_review_comments(
7143            &self,
7144            owner: &str,
7145            repo: &str,
7146            number: i64,
7147        ) -> anyhow::Result<Vec<github::GitHubReviewComment>> {
7148            Ok(vec![github::GitHubReviewComment {
7149                id: 4203,
7150                owner: owner.to_string(),
7151                repo: repo.to_string(),
7152                number,
7153                path: Some("docs/search.md".to_string()),
7154                html_url: format!("https://github.com/{owner}/{repo}/pull/{number}#discussion"),
7155                body: "No longer use obsolete duckdb rationale.".to_string(),
7156                author: Some("reviewer".to_string()),
7157                created_at: Some("2026-01-01T03:00:00Z".to_string()),
7158                updated_at: Some("2026-01-01T03:00:00Z".to_string()),
7159            }])
7160        }
7161    }
7162
7163    struct PartiallyFailingGitHubClient;
7164
7165    impl github::GitHubClient for PartiallyFailingGitHubClient {
7166        fn issue(
7167            &self,
7168            owner: &str,
7169            repo: &str,
7170            number: i64,
7171        ) -> anyhow::Result<github::GitHubIssue> {
7172            if number == 404 {
7173                anyhow::bail!("gh: Not Found (HTTP 404)");
7174            }
7175            MockGitHubClient.issue(owner, repo, number)
7176        }
7177
7178        fn issue_comments(
7179            &self,
7180            owner: &str,
7181            repo: &str,
7182            number: i64,
7183        ) -> anyhow::Result<Vec<github::GitHubComment>> {
7184            MockGitHubClient.issue_comments(owner, repo, number)
7185        }
7186
7187        fn pull(
7188            &self,
7189            owner: &str,
7190            repo: &str,
7191            number: i64,
7192        ) -> anyhow::Result<Option<github::GitHubPullRequest>> {
7193            MockGitHubClient.pull(owner, repo, number)
7194        }
7195
7196        fn pull_reviews(
7197            &self,
7198            owner: &str,
7199            repo: &str,
7200            number: i64,
7201        ) -> anyhow::Result<Vec<github::GitHubReview>> {
7202            MockGitHubClient.pull_reviews(owner, repo, number)
7203        }
7204
7205        fn pull_review_comments(
7206            &self,
7207            owner: &str,
7208            repo: &str,
7209            number: i64,
7210        ) -> anyhow::Result<Vec<github::GitHubReviewComment>> {
7211            MockGitHubClient.pull_review_comments(owner, repo, number)
7212        }
7213    }
7214}