Skip to main content

rag_rat_core/index/
mod.rs

1pub mod ai;
2pub mod anchors;
3pub mod chunker;
4pub mod edges;
5pub mod git_history;
6pub mod github;
7pub mod parser;
8pub mod schema;
9pub mod symbols;
10pub mod walker;
11
12#[cfg(test)]
13mod anchor_tests;
14#[cfg(test)]
15mod parser_tests;
16
17use std::{
18    collections::{BTreeMap, BTreeSet},
19    fs,
20    path::{Path, PathBuf},
21    process::Command,
22    sync::{
23        atomic::{AtomicUsize, Ordering},
24        mpsc,
25    },
26    thread,
27    thread::JoinHandle,
28    time::{SystemTime, UNIX_EPOCH},
29};
30
31use gix::{
32    bstr::{BString, ByteSlice},
33    status::{UntrackedFiles, tree_index},
34};
35use rayon::prelude::*;
36use regex::Regex;
37use rusqlite::{OptionalExtension, params};
38use serde::Serialize;
39use sha2::{Digest, Sha256};
40use thiserror::Error;
41
42use crate::{
43    config::{Config, TargetKind},
44    index::{
45        ai::{LocalAiStatus, ModelInfo, ReconcilePlan, ReconcileReport},
46        anchors::{AnchorStatus, ChunkAnchor},
47        chunker::Chunk,
48        git_history::{
49            ChunkBlameSummary, CommitSearchHit, GitHistoryIndexStatus, PathHistoryItem,
50            QueryCommitHit, SymbolHistoryItem,
51        },
52        github::{GitHubEvidence, GitHubStatus, GitHubSyncReport, Papertrail},
53        symbols::Symbol,
54    },
55    language::Language,
56    query::graph_meta::{self, GraphMetaMode},
57    search::lexical::{SearchHit, SearchOptions},
58    storage::IndexConnection,
59    storage::StorageStatus,
60};
61
62#[derive(Debug)]
63pub struct IndexDatabase {
64    storage: IndexConnection,
65    pub active_commit_sha: String,
66    pub active_worktree_id: String,
67}
68
69#[derive(Debug, Clone)]
70pub enum IndexProgress {
71    Started {
72        database: PathBuf,
73        mode: IndexMode,
74    },
75    Discovering,
76    Discovered {
77        files: usize,
78    },
79    PreparingFile {
80        current: usize,
81        total: usize,
82        path: PathBuf,
83        language: Language,
84        kind: TargetKind,
85    },
86    IndexingFile {
87        current: usize,
88        total: usize,
89        path: PathBuf,
90        language: Language,
91        kind: TargetKind,
92    },
93    IndexingGitHistory,
94    RebuildingLogicalSymbols,
95    ResolvingGraph,
96    SyncingFts,
97    RebuildingFts,
98    Finished {
99        files: usize,
100    },
101}
102
103#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
104#[serde(rename_all = "snake_case")]
105pub enum IndexMode {
106    Changed,
107    Discover,
108    Full,
109}
110
111impl IndexMode {
112    pub fn label(self) -> &'static str {
113        match self {
114            Self::Changed => "changed files",
115            Self::Discover => "discovery",
116            Self::Full => "full rebuild",
117        }
118    }
119}
120
121#[derive(Debug, Serialize)]
122pub struct IndexStatus {
123    pub database: String,
124    pub exists: bool,
125    pub schema: schema::SchemaStatus,
126    pub git_commit: Option<String>,
127    pub git_dirty: Option<bool>,
128    pub indexed_at_ms: Option<i64>,
129    pub content_revision: String,
130    pub fts_synced_at_ms: Option<i64>,
131    pub fts_source_revision: Option<String>,
132    pub fts_dirty: bool,
133    pub fts_fresh: bool,
134    pub file_count_by_language: BTreeMap<String, u64>,
135    pub parser_failures: u64,
136    pub parser_failure_paths: Vec<ParserFailure>,
137    pub git_history: GitHistoryIndexStatus,
138    pub github: GitHubStatus,
139    pub local_ai: LocalAiStatus,
140}
141
142#[derive(Debug, Serialize)]
143pub struct HealIndexReport {
144    pub checked_files: u64,
145    pub healed_files: u64,
146    pub removed_files: u64,
147    pub skipped_files: u64,
148    pub fts_fresh: bool,
149    pub message: Option<String>,
150}
151
152#[derive(Debug, Serialize)]
153pub struct ParserFailure {
154    pub path: String,
155    pub language: String,
156    pub message: String,
157}
158
159#[derive(Debug, Serialize)]
160pub struct DiscoveryStatus {
161    pub discovered_files: usize,
162    pub indexed_files: usize,
163    pub unindexed_files: usize,
164    pub unindexed_source_files: usize,
165    pub changed_indexed_files: usize,
166    pub removed_indexed_files: usize,
167    pub unindexed_sample: Vec<String>,
168    pub warning: Option<String>,
169}
170
171const MAX_AUTO_HEAL_FILES_PER_CALL: usize = 4;
172const GRAPH_INDEX_VERSION: &str = "6";
173
174#[derive(Debug, Error)]
175pub enum IndexError {
176    #[error("Gone: indexed chunk {chunk_id} no longer exists")]
177    Gone { chunk_id: i64 },
178    #[error("StaleChunk: chunk {chunk_id} in {path} could not be relocated after reindex")]
179    StaleChunk { chunk_id: i64, path: String },
180    #[error("needs_reindex: {stale_files} stale files exceeds automatic heal cap {cap}")]
181    NeedsReindex { stale_files: usize, cap: usize },
182}
183
184impl IndexDatabase {
185    pub fn open(path: &Path) -> anyhow::Result<Self> {
186        Self::open_with_graph_check(path, true)
187    }
188
189    pub fn database_path(&self) -> &Path {
190        self.storage.database_path()
191    }
192
193    fn open_with_graph_check(path: &Path, check_graph: bool) -> anyhow::Result<Self> {
194        let mut storage = IndexConnection::open(path)?;
195        schema::check_compatible(storage.connection())?;
196        ai::ensure_model_manifest(storage.connection())?;
197        if let Some(root) = meta_for(storage.connection(), "source_root")? {
198            storage.set_source_root(PathBuf::from(root));
199        }
200        let db =
201            Self { storage, active_commit_sha: String::new(), active_worktree_id: String::new() };
202        if check_graph {
203            db.ensure_graph_index_current()?;
204        }
205        Ok(db)
206    }
207
208    pub fn open_config(config: &Config) -> anyhow::Result<Self> {
209        let mut db = Self::open_with_graph_check(&config.database, false)?;
210        db.storage.set_source_root(config.root.clone());
211        let (commit_sha, worktree_id) = resolve_git_context(&config.root);
212        db.set_context(&commit_sha, &worktree_id)?;
213        db.ensure_graph_index_current()?;
214        Ok(db)
215    }
216
217    pub fn migrate(path: &Path) -> anyhow::Result<schema::SchemaStatus> {
218        Self::migrate_with_fastembed_cache(path, None)
219    }
220
221    fn migrate_with_fastembed_cache(
222        path: &Path,
223        fastembed_cache_dir: Option<&Path>,
224    ) -> anyhow::Result<schema::SchemaStatus> {
225        let storage = IndexConnection::open(path)?;
226        let status = schema::status(storage.connection())?;
227        match status.state {
228            schema::SchemaState::Newer | schema::SchemaState::Dirty => {
229                anyhow::bail!("{}", status.message);
230            },
231            schema::SchemaState::Compatible => {},
232            schema::SchemaState::Missing | schema::SchemaState::Older => {
233                schema::apply(storage.connection())?;
234            },
235        }
236        ai::ensure_model_manifest(storage.connection())?;
237        if let Some(fastembed_cache_dir) = fastembed_cache_dir {
238            ai::recover_cached_fastembed_model_from(storage.connection(), fastembed_cache_dir)?;
239        } else {
240            ai::recover_cached_fastembed_model(storage.connection())?;
241        }
242        schema::status(storage.connection())
243    }
244
245    pub fn migration_check(path: &Path) -> anyhow::Result<schema::SchemaStatus> {
246        let storage = IndexConnection::open(path)?;
247        schema::status(storage.connection())
248    }
249
250    fn create_or_migrate(path: &Path) -> anyhow::Result<Self> {
251        let mut storage = IndexConnection::open(path)?;
252        schema::apply(storage.connection())?;
253        ai::ensure_model_manifest(storage.connection())?;
254        if let Some(root) = meta_for(storage.connection(), "source_root")? {
255            storage.set_source_root(PathBuf::from(root));
256        }
257        Ok(Self { storage, active_commit_sha: String::new(), active_worktree_id: String::new() })
258    }
259
260    pub fn set_context(&mut self, commit_sha: &str, worktree_id: &str) -> anyhow::Result<()> {
261        self.active_commit_sha = commit_sha.to_string();
262        self.active_worktree_id = worktree_id.to_string();
263
264        let conn = self.storage.connection();
265        conn.execute_batch(
266            "
267            CREATE TEMP TABLE IF NOT EXISTS connection_context(key TEXT PRIMARY KEY, value TEXT);
268        ",
269        )?;
270
271        let mut stmt = conn.prepare(
272            "INSERT OR REPLACE INTO temp.connection_context(key, value) VALUES (?1, ?2)",
273        )?;
274        stmt.execute(params!["commit_sha", commit_sha])?;
275        stmt.execute(params!["worktree_id", worktree_id])?;
276
277        conn.execute_batch("
278            DROP VIEW IF EXISTS temp.files;
279            CREATE TEMP VIEW temp.files AS
280            SELECT id, path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms, indexed_revision, commit_sha, worktree_id
281            FROM main.files
282            WHERE worktree_id = (SELECT value FROM temp.connection_context WHERE key = 'worktree_id') AND worktree_id != '' AND kind != 'deleted'
283            UNION ALL
284            SELECT id, path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms, indexed_revision, commit_sha, worktree_id
285            FROM main.files
286            WHERE commit_sha = (SELECT value FROM temp.connection_context WHERE key = 'commit_sha')
287              AND commit_sha != ''
288              AND path NOT IN (
289                  SELECT path FROM main.files 
290                  WHERE worktree_id = (SELECT value FROM temp.connection_context WHERE key = 'worktree_id')
291                    AND worktree_id != ''
292              );
293        ")?;
294
295        Ok(())
296    }
297
298    pub fn rebuild(config: &Config) -> anyhow::Result<Self> {
299        Self::rebuild_with_progress(config, |_| {})
300    }
301
302    pub fn rebuild_with_progress<F>(config: &Config, mut progress: F) -> anyhow::Result<Self>
303    where
304        F: FnMut(IndexProgress),
305    {
306        progress(IndexProgress::Started {
307            database: config.database.clone(),
308            mode: IndexMode::Full,
309        });
310        let mut db = Self::create_or_migrate(&config.database)?;
311        let (commit_sha, worktree_id) = resolve_git_context(&config.root);
312        db.set_context(&commit_sha, &worktree_id)?;
313        progress(IndexProgress::IndexingGitHistory);
314        let mut git_history = Some(spawn_git_history_prepare(&config.root));
315        let result = (|| -> anyhow::Result<()> {
316            db.storage.execute_batch("BEGIN TRANSACTION")?;
317            db.clear_full_rebuild_tables()?;
318            db.set_meta("source_root", &config.root.display().to_string())?;
319            db.storage.set_source_root(config.root.clone());
320            db.write_git_meta(&config.root)?;
321            let indexed = db.index_targets_with_progress(config, &mut progress)?;
322            db.apply_prepared_git_history(
323                &config.root,
324                git_history
325                    .take()
326                    .ok_or_else(|| anyhow::anyhow!("git history preparation was already used"))?,
327            )?;
328            progress(IndexProgress::RebuildingLogicalSymbols);
329            db.rebuild_logical_symbols()?;
330            progress(IndexProgress::ResolvingGraph);
331            db.resolve_edges()?;
332            db.mark_graph_index_current()?;
333            progress(IndexProgress::RebuildingFts);
334            db.rebuild_fts()?;
335            db.set_meta("indexed_at_ms", &now_ms().to_string())?;
336            db.storage.execute_batch("COMMIT")?;
337            progress(IndexProgress::Finished { files: indexed });
338            Ok(())
339        })();
340        if result.is_err() {
341            if let Some(handle) = git_history.take() {
342                let _ = join_git_history_prepare(handle);
343            }
344            let _ = db.storage.execute_batch("ROLLBACK");
345        }
346        result?;
347        Ok(db)
348    }
349
350    fn clear_full_rebuild_tables(&self) -> anyhow::Result<()> {
351        self.storage.execute_batch(
352            "
353            CREATE TEMP TABLE IF NOT EXISTS full_rebuild_file_ids(id INTEGER PRIMARY KEY);
354            DELETE FROM temp.full_rebuild_file_ids;
355            INSERT OR IGNORE INTO temp.full_rebuild_file_ids(id)
356            SELECT id
357            FROM main.files
358            WHERE worktree_id = (SELECT value FROM temp.connection_context WHERE key = 'worktree_id')
359              AND worktree_id != '';
360            INSERT OR IGNORE INTO temp.full_rebuild_file_ids(id)
361            SELECT id
362            FROM main.files
363            WHERE commit_sha = (SELECT value FROM temp.connection_context WHERE key = 'commit_sha')
364              AND commit_sha != ''
365              AND path NOT IN (
366                  SELECT path FROM main.files
367                  WHERE worktree_id = (SELECT value FROM temp.connection_context WHERE key = 'worktree_id')
368                    AND worktree_id != ''
369              );
370
371            UPDATE main.edges
372            SET to_symbol_id = NULL,
373                target_start_line = NULL,
374                target_end_line = NULL,
375                resolution = 'unresolved'
376            WHERE to_symbol_id IN (
377                SELECT symbols.id
378                FROM main.symbols
379                JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = symbols.file_id
380            );
381            DELETE FROM main.edges
382            WHERE source_file_id IN (SELECT id FROM temp.full_rebuild_file_ids)
383               OR from_symbol_id IN (
384                    SELECT symbols.id
385                    FROM main.symbols
386                    JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = symbols.file_id
387               );
388
389            DELETE FROM main.logical_symbol_members
390            WHERE symbol_id IN (
391                SELECT symbols.id
392                FROM main.symbols
393                JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = symbols.file_id
394            );
395            DELETE FROM main.logical_symbols
396            WHERE id NOT IN (
397                SELECT logical_symbol_id FROM main.logical_symbol_members
398            );
399            DELETE FROM main.symbol_facts
400            WHERE symbol_id IN (
401                SELECT symbols.id
402                FROM main.symbols
403                JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = symbols.file_id
404            );
405            DELETE FROM main.chunk_fts
406            WHERE rowid IN (
407                SELECT chunks.id
408                FROM main.chunks
409                JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = chunks.file_id
410            );
411            DELETE FROM main.chunk_summaries
412            WHERE chunk_id IN (
413                SELECT chunks.id
414                FROM main.chunks
415                JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = chunks.file_id
416            );
417            DELETE FROM main.chunk_embeddings
418            WHERE chunk_id IN (
419                SELECT chunks.id
420                FROM main.chunks
421                JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = chunks.file_id
422            );
423            DELETE FROM main.git_chunk_blame
424            WHERE chunk_id IN (
425                SELECT chunks.id
426                FROM main.chunks
427                JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = chunks.file_id
428            );
429            DELETE FROM main.docs
430            WHERE chunk_id IN (
431                SELECT chunks.id
432                FROM main.chunks
433                JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = chunks.file_id
434            );
435            DELETE FROM main.parser_failures
436            WHERE path IN (
437                SELECT path
438                FROM main.files
439                JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = files.id
440            );
441            DELETE FROM main.symbols
442            WHERE file_id IN (SELECT id FROM temp.full_rebuild_file_ids);
443            DELETE FROM main.chunks
444            WHERE file_id IN (SELECT id FROM temp.full_rebuild_file_ids);
445            DELETE FROM main.files
446            WHERE id IN (SELECT id FROM temp.full_rebuild_file_ids);
447            DELETE FROM temp.full_rebuild_file_ids;
448            ",
449        )?;
450        Ok(())
451    }
452
453    pub fn index_changed(config: &Config) -> anyhow::Result<Self> {
454        Self::index_changed_with_progress(config, |_| {})
455    }
456
457    pub fn index_changed_with_progress<F>(config: &Config, mut progress: F) -> anyhow::Result<Self>
458    where
459        F: FnMut(IndexProgress),
460    {
461        Self::index_incremental_with_progress(config, IndexMode::Changed, &mut progress)
462    }
463
464    pub fn index_discover(config: &Config) -> anyhow::Result<Self> {
465        Self::index_discover_with_progress(config, |_| {})
466    }
467
468    pub fn index_discover_with_progress<F>(config: &Config, mut progress: F) -> anyhow::Result<Self>
469    where
470        F: FnMut(IndexProgress),
471    {
472        Self::index_incremental_with_progress(config, IndexMode::Discover, &mut progress)
473    }
474
475    fn index_incremental_with_progress<F>(
476        config: &Config,
477        mode: IndexMode,
478        progress: &mut F,
479    ) -> anyhow::Result<Self>
480    where
481        F: FnMut(IndexProgress),
482    {
483        if !config.database.exists() {
484            return Self::rebuild_with_progress(config, progress);
485        }
486        if Self::migration_check(&config.database)?.state == schema::SchemaState::Missing {
487            return Self::rebuild_with_progress(config, progress);
488        }
489
490        let mut db = Self::open(&config.database)?;
491        let (commit_sha, worktree_id) = resolve_git_context(&config.root);
492        db.set_context(&commit_sha, &worktree_id)?;
493        if db.indexed_file_count()? == 0 {
494            return Self::rebuild_with_progress(config, progress);
495        }
496        progress(IndexProgress::Started { database: config.database.clone(), mode });
497        progress(IndexProgress::IndexingGitHistory);
498        let mut git_history = Some(spawn_git_history_prepare(&config.root));
499        let result = (|| -> anyhow::Result<()> {
500            db.storage.execute_batch("BEGIN TRANSACTION")?;
501            db.set_meta("source_root", &config.root.display().to_string())?;
502            db.storage.set_source_root(config.root.clone());
503            db.write_git_meta(&config.root)?;
504            let indexed = match mode {
505                IndexMode::Changed => db.index_changed_files_with_progress(config, progress)?,
506                IndexMode::Discover => db.index_discovered_files_with_progress(config, progress)?,
507                IndexMode::Full => unreachable!("full mode is handled by rebuild_with_progress"),
508            };
509            db.apply_prepared_git_history(
510                &config.root,
511                git_history
512                    .take()
513                    .ok_or_else(|| anyhow::anyhow!("git history preparation was already used"))?,
514            )?;
515            if indexed > 0 {
516                progress(IndexProgress::RebuildingLogicalSymbols);
517                db.rebuild_logical_symbols()?;
518                progress(IndexProgress::ResolvingGraph);
519                db.resolve_edges()?;
520                db.mark_graph_index_current()?;
521                progress(IndexProgress::SyncingFts);
522                db.sync_fts()?;
523            }
524            db.set_meta("indexed_at_ms", &now_ms().to_string())?;
525            db.storage.execute_batch("COMMIT")?;
526            progress(IndexProgress::Finished { files: indexed });
527            Ok(())
528        })();
529        if result.is_err() {
530            if let Some(handle) = git_history.take() {
531                let _ = join_git_history_prepare(handle);
532            }
533            let _ = db.storage.execute_batch("ROLLBACK");
534        }
535        result?;
536        Ok(db)
537    }
538
539    pub fn index_targets(&self, config: &Config) -> anyhow::Result<()> {
540        self.index_targets_with_progress(config, &mut |_| {})?;
541        Ok(())
542    }
543
544    fn index_targets_with_progress<F>(
545        &self,
546        config: &Config,
547        progress: &mut F,
548    ) -> anyhow::Result<usize>
549    where
550        F: FnMut(IndexProgress),
551    {
552        progress(IndexProgress::Discovering);
553        let files = collect_index_files(config)?;
554        let changes = git_changed_paths(&config.root).unwrap_or_default();
555        let files = self.assign_file_scopes(files, &changes);
556        progress(IndexProgress::Discovered { files: files.len() });
557
558        let prepared = prepare_files_with_progress(&files, progress)?;
559        for (index, prepared_file) in prepared.iter().enumerate() {
560            let current = index + 1;
561            if should_report_file_progress(current, files.len()) {
562                progress(IndexProgress::IndexingFile {
563                    current,
564                    total: files.len(),
565                    path: prepared_file.file.relative_path.clone(),
566                    language: prepared_file.file.language,
567                    kind: prepared_file.file.kind,
568                });
569            }
570            self.insert_prepared_file(prepared_file)?;
571        }
572
573        Ok(files.len())
574    }
575
576    fn index_changed_files_with_progress<F>(
577        &self,
578        config: &Config,
579        progress: &mut F,
580    ) -> anyhow::Result<usize>
581    where
582        F: FnMut(IndexProgress),
583    {
584        progress(IndexProgress::Discovering);
585        let changes = git_changed_paths(&config.root)?;
586        let files = collect_changed_index_files(config, &changes)?;
587        let files = self.assign_file_scopes(files, &changes);
588        self.apply_incremental_file_plan(files, changes.deleted, progress)
589    }
590
591    fn index_discovered_files_with_progress<F>(
592        &self,
593        config: &Config,
594        progress: &mut F,
595    ) -> anyhow::Result<usize>
596    where
597        F: FnMut(IndexProgress),
598    {
599        progress(IndexProgress::Discovering);
600        let plan = discovery_plan(self.storage.connection(), config)?;
601        let changes = git_changed_paths(&config.root).unwrap_or_default();
602        let files = self.assign_file_scopes(plan.files, &changes);
603        self.apply_incremental_file_plan(files, plan.deleted, progress)
604    }
605
606    fn assign_file_scopes(
607        &self,
608        files: Vec<IndexFile>,
609        changes: &GitChangedPaths,
610    ) -> Vec<IndexFile> {
611        let has_base_commit = !self.active_commit_sha.is_empty();
612        files
613            .into_iter()
614            .map(|mut file| {
615                if !has_base_commit || changes.changed.contains(&file.relative_path) {
616                    file.commit_sha.clear();
617                    file.worktree_id.clone_from(&self.active_worktree_id);
618                } else {
619                    file.commit_sha.clone_from(&self.active_commit_sha);
620                    file.worktree_id.clear();
621                }
622                file
623            })
624            .collect()
625    }
626
627    fn apply_incremental_file_plan<F>(
628        &self,
629        files: Vec<IndexFile>,
630        deleted: BTreeSet<PathBuf>,
631        progress: &mut F,
632    ) -> anyhow::Result<usize>
633    where
634        F: FnMut(IndexProgress),
635    {
636        progress(IndexProgress::Discovered { files: files.len() });
637
638        let deleted_count = deleted.len();
639        for path in deleted {
640            self.mark_file_deleted(&path)?;
641        }
642
643        let prepared = prepare_files_with_progress(&files, progress)?;
644        for (index, prepared_file) in prepared.iter().enumerate() {
645            let current = index + 1;
646            if should_report_file_progress(current, files.len()) {
647                progress(IndexProgress::IndexingFile {
648                    current,
649                    total: files.len(),
650                    path: prepared_file.file.relative_path.clone(),
651                    language: prepared_file.file.language,
652                    kind: prepared_file.file.kind,
653                });
654            }
655            self.remove_file_in_scope(
656                &prepared_file.file.relative_path,
657                &prepared_file.file.commit_sha,
658                &prepared_file.file.worktree_id,
659            )?;
660            self.insert_prepared_file(prepared_file)?;
661        }
662
663        Ok(files.len() + deleted_count)
664    }
665
666    pub fn status(&self, database: &Path) -> anyhow::Result<IndexStatus> {
667        let mut counts = BTreeMap::new();
668        let mut stmt = self
669            .storage
670            .connection()
671            .prepare("SELECT language, COUNT(*) FROM files GROUP BY language ORDER BY language")?;
672        let rows =
673            stmt.query_map([], |row| Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?)))?;
674        for row in rows {
675            let (language, count) = row?;
676            counts.insert(language, u64::try_from(count).unwrap_or(0));
677        }
678
679        let content_revision = self.content_revision()?;
680        let fts_source_revision = self.meta("fts_source_revision")?;
681        let fts_dirty = self.fts_dirty()?;
682
683        Ok(IndexStatus {
684            database: database.display().to_string(),
685            exists: database.exists(),
686            schema: schema::status(self.storage.connection())?,
687            git_commit: self.meta("git_commit")?,
688            git_dirty: self.meta("git_dirty")?.map(|value| value == "true"),
689            indexed_at_ms: self.meta("indexed_at_ms")?.and_then(|value| value.parse::<i64>().ok()),
690            content_revision: content_revision.clone(),
691            fts_synced_at_ms: self
692                .meta("fts_synced_at_ms")?
693                .and_then(|value| value.parse::<i64>().ok()),
694            fts_dirty,
695            fts_fresh: !fts_dirty
696                && fts_source_revision.as_deref() == Some(content_revision.as_str()),
697            fts_source_revision,
698            file_count_by_language: counts,
699            parser_failures: self.parser_failure_count()?,
700            parser_failure_paths: self.parser_failure_paths()?,
701            git_history: self.git_history_status()?,
702            github: self.github_status()?,
703            local_ai: self.local_ai_status()?,
704        })
705    }
706
707    pub fn storage_status(&self) -> anyhow::Result<StorageStatus> {
708        self.storage.status()
709    }
710
711    pub fn discovery_status(&self, config: &Config) -> anyhow::Result<DiscoveryStatus> {
712        let plan = discovery_plan(self.storage.connection(), config)?;
713        let unindexed_source_files =
714            plan.unindexed.iter().filter(|file| file.kind == TargetKind::Source).count();
715        let unindexed_sample =
716            plan.unindexed.iter().take(10).map(|file| path_string(&file.relative_path)).collect();
717        let warning = (unindexed_source_files > 0).then(|| {
718            format!(
719                "{unindexed_source_files} unindexed source files detected. Run `rag-rat index --full` or `rag-rat index --discover`."
720            )
721        });
722        Ok(DiscoveryStatus {
723            discovered_files: plan.discovered_files,
724            indexed_files: plan.indexed_files,
725            unindexed_files: plan.unindexed.len(),
726            unindexed_source_files,
727            changed_indexed_files: plan.changed.len(),
728            removed_indexed_files: plan.deleted.len(),
729            unindexed_sample,
730            warning,
731        })
732    }
733
734    pub fn search(
735        &self,
736        query: &str,
737        limit: u32,
738        include_generated: bool,
739    ) -> anyhow::Result<Vec<SearchHit>> {
740        self.search_with_graph_meta(query, limit, include_generated, GraphMetaMode::Compact, 3)
741    }
742
743    pub fn search_explain(
744        &self,
745        query: &str,
746        limit: u32,
747        include_generated: bool,
748    ) -> anyhow::Result<Vec<SearchHit>> {
749        self.search_explain_with_graph_meta(
750            query,
751            limit,
752            include_generated,
753            GraphMetaMode::Compact,
754            3,
755        )
756    }
757
758    pub fn search_with_graph_meta(
759        &self,
760        query: &str,
761        limit: u32,
762        include_generated: bool,
763        graph_mode: GraphMetaMode,
764        graph_limit: u32,
765    ) -> anyhow::Result<Vec<SearchHit>> {
766        self.search_with_graph_meta_options(
767            query,
768            limit,
769            include_generated,
770            graph_mode,
771            graph_limit,
772            SearchOptions::default(),
773        )
774    }
775
776    pub fn search_with_graph_meta_options(
777        &self,
778        query: &str,
779        limit: u32,
780        include_generated: bool,
781        graph_mode: GraphMetaMode,
782        graph_limit: u32,
783        options: SearchOptions,
784    ) -> anyhow::Result<Vec<SearchHit>> {
785        self.ensure_fts_fresh()?;
786        let mut hits =
787            self.search_with_heal(query, limit, include_generated, true, false, options)?;
788        graph_meta::attach_to_search_hits(
789            self.storage.connection(),
790            &mut hits,
791            graph_mode,
792            graph_limit,
793        )?;
794        Ok(hits)
795    }
796
797    pub fn search_explain_with_graph_meta(
798        &self,
799        query: &str,
800        limit: u32,
801        include_generated: bool,
802        graph_mode: GraphMetaMode,
803        graph_limit: u32,
804    ) -> anyhow::Result<Vec<SearchHit>> {
805        self.search_explain_with_graph_meta_options(
806            query,
807            limit,
808            include_generated,
809            graph_mode,
810            graph_limit,
811            SearchOptions::default(),
812        )
813    }
814
815    pub fn search_explain_with_graph_meta_options(
816        &self,
817        query: &str,
818        limit: u32,
819        include_generated: bool,
820        graph_mode: GraphMetaMode,
821        graph_limit: u32,
822        options: SearchOptions,
823    ) -> anyhow::Result<Vec<SearchHit>> {
824        self.ensure_fts_fresh()?;
825        let mut hits =
826            self.search_with_heal(query, limit, include_generated, true, true, options)?;
827        graph_meta::attach_to_search_hits(
828            self.storage.connection(),
829            &mut hits,
830            graph_mode,
831            graph_limit,
832        )?;
833        Ok(hits)
834    }
835
836    pub fn symbols(
837        &self,
838        name: &str,
839        language: Option<Language>,
840        limit: u32,
841    ) -> anyhow::Result<Vec<crate::query::symbol::SymbolHit>> {
842        crate::query::symbol::lookup(self.storage.connection(), name, language, limit)
843    }
844
845    pub fn symbol_candidates(
846        &self,
847        selector: &crate::query::symbol::SymbolSelector,
848    ) -> anyhow::Result<crate::query::symbol::SymbolLookup> {
849        crate::query::symbol::lookup_candidates(self.storage.connection(), selector)
850    }
851
852    pub fn select_symbol(
853        &self,
854        selector: &crate::query::symbol::SymbolSelector,
855    ) -> anyhow::Result<
856        Result<Option<crate::query::symbol::SymbolHit>, crate::query::symbol::SymbolDisambiguation>,
857    > {
858        crate::query::symbol::select_one(self.storage.connection(), selector)
859    }
860
861    pub fn read_chunk(&self, chunk_id: i64) -> anyhow::Result<Option<crate::query::ReadChunk>> {
862        self.read_chunk_with_graph_and_memories(chunk_id, GraphMetaMode::Full, 20, true)
863    }
864
865    pub fn read_chunk_with_graph(
866        &self,
867        chunk_id: i64,
868        graph_mode: GraphMetaMode,
869        graph_limit: u32,
870    ) -> anyhow::Result<Option<crate::query::ReadChunk>> {
871        self.read_chunk_with_graph_and_memories(chunk_id, graph_mode, graph_limit, false)
872    }
873
874    pub fn read_chunk_with_graph_and_memories(
875        &self,
876        chunk_id: i64,
877        graph_mode: GraphMetaMode,
878        graph_limit: u32,
879        include_memories: bool,
880    ) -> anyhow::Result<Option<crate::query::ReadChunk>> {
881        let Some(mut chunk) = self.read_chunk_current(chunk_id)? else {
882            return Ok(None);
883        };
884        graph_meta::attach_to_read_chunk(
885            self.storage.connection(),
886            &mut chunk,
887            graph_mode,
888            graph_limit,
889        )?;
890        if include_memories {
891            chunk.memories =
892                crate::query::memory::memories_for_chunk(self.storage.connection(), chunk_id, 20)?;
893        }
894        Ok(Some(chunk))
895    }
896
897    fn read_chunk_current(&self, chunk_id: i64) -> anyhow::Result<Option<crate::query::ReadChunk>> {
898        let Some(mut chunk) = crate::query::read_chunk(self.storage.connection(), chunk_id)? else {
899            return Ok(None);
900        };
901        let Some(root) = self.storage.source_root() else {
902            return Ok(Some(chunk));
903        };
904        let source_path = root.join(&chunk.path);
905        let current_text = match fs::read_to_string(&source_path) {
906            Ok(text) => text,
907            Err(_) => {
908                let path = chunk.path.clone();
909                self.mark_file_deleted(Path::new(&path))?;
910                self.sync_fts()?;
911                anyhow::bail!(IndexError::Gone { chunk_id });
912            },
913        };
914        let anchor = self.chunk_anchor(chunk_id)?;
915        let status = anchors::validate(
916            &chunk.text,
917            usize::try_from(chunk.start_line).unwrap_or(1),
918            usize::try_from(chunk.end_line).unwrap_or(1),
919            &anchor,
920            &current_text,
921        );
922        match status {
923            AnchorStatus::Exact => {
924                if let Some(text) = anchors::slice_lines(
925                    &current_text,
926                    usize::try_from(chunk.start_line).unwrap_or(1),
927                    usize::try_from(chunk.end_line).unwrap_or(1),
928                ) {
929                    chunk.text = text;
930                }
931                Ok(Some(chunk))
932            },
933            AnchorStatus::Relocated { start_line, end_line, text } => {
934                chunk.start_line = i64::try_from(start_line)?;
935                chunk.end_line = i64::try_from(end_line)?;
936                chunk.text = text;
937                Ok(Some(chunk))
938            },
939            AnchorStatus::Stale => {
940                self.heal_file(Path::new(&chunk.path))?;
941                self.sync_fts()?;
942                let healed = crate::query::read_chunk(self.storage.connection(), chunk_id)?;
943                match healed {
944                    Some(chunk) => Ok(Some(chunk)),
945                    None => anyhow::bail!(IndexError::StaleChunk { chunk_id, path: chunk.path }),
946                }
947            },
948        }
949    }
950
951    pub fn search_hash_baseline(
952        &self,
953        query: &str,
954        limit: u32,
955        include_generated: bool,
956    ) -> anyhow::Result<Vec<SearchHit>> {
957        self.ensure_fts_fresh()?;
958        crate::search::lexical::search_hash_baseline(
959            self.storage.connection(),
960            query,
961            limit,
962            include_generated,
963        )
964    }
965
966    pub fn docs_for_symbol(&self, symbol: &str, limit: u32) -> anyhow::Result<Vec<SearchHit>> {
967        self.search(symbol, limit, true)
968    }
969
970    pub fn docs_for_selected_symbol(
971        &self,
972        symbol: &crate::query::symbol::SymbolHit,
973        limit: u32,
974    ) -> anyhow::Result<Vec<SearchHit>> {
975        let mut hits = self.local_symbol_context_hits(symbol, limit)?;
976        hits.extend(self.search(&symbol.name, limit.saturating_mul(4).max(limit), true)?);
977        rank_docs_for_symbol(symbol, &mut hits);
978        dedupe_search_hits(&mut hits);
979        hits.truncate(usize::try_from(limit).unwrap_or(usize::MAX));
980        Ok(hits)
981    }
982
983    pub fn commit_search(&self, query: &str, limit: u32) -> anyhow::Result<Vec<CommitSearchHit>> {
984        git_history::commit_search(self.storage.connection(), query, limit)
985    }
986
987    pub fn git_history_for_path(
988        &self,
989        path: &str,
990        limit: u32,
991    ) -> anyhow::Result<Vec<PathHistoryItem>> {
992        git_history::history_for_path(self.storage.connection(), path, limit)
993    }
994
995    pub fn git_history_for_symbol(
996        &self,
997        symbol: &str,
998        language: Option<Language>,
999        limit: u32,
1000    ) -> anyhow::Result<Vec<SymbolHistoryItem>> {
1001        let symbols = self.symbols(symbol, language, limit)?;
1002        let per_symbol_limit = limit.max(1);
1003        let mut out = Vec::new();
1004        for symbol_hit in symbols {
1005            for commit in self.git_history_for_path(&symbol_hit.path, per_symbol_limit)? {
1006                out.push(SymbolHistoryItem {
1007                    symbol: symbol_hit.name.clone(),
1008                    qualified_name: symbol_hit.qualified_name.clone(),
1009                    path: symbol_hit.path.clone(),
1010                    start_byte: symbol_hit.start_byte,
1011                    end_byte: symbol_hit.end_byte,
1012                    commit,
1013                    evidence_kind: "historical",
1014                });
1015                if out.len() >= usize::try_from(limit).unwrap_or(usize::MAX) {
1016                    return Ok(out);
1017                }
1018            }
1019        }
1020        Ok(out)
1021    }
1022
1023    pub fn commits_touching_query(
1024        &self,
1025        query: &str,
1026        limit: u32,
1027    ) -> anyhow::Result<Vec<QueryCommitHit>> {
1028        let current_hits = self.search(query, limit, true)?;
1029        git_history::commits_touching_query(self.storage.connection(), query, limit, &current_hits)
1030    }
1031
1032    pub fn git_blame_chunk(&self, chunk_id: i64) -> anyhow::Result<Option<ChunkBlameSummary>> {
1033        let Some(chunk) = self.read_chunk(chunk_id)? else {
1034            return Ok(None);
1035        };
1036        let source_text_hash = git_history::source_text_hash(&chunk.text);
1037        if let Some(cached) =
1038            git_history::cached_blame(self.storage.connection(), chunk_id, &source_text_hash)?
1039        {
1040            return Ok(Some(cached));
1041        }
1042        let Some(root) = self.storage.source_root() else {
1043            return Ok(Some(ChunkBlameSummary {
1044                chunk_id,
1045                path: chunk.path,
1046                start_line: chunk.start_line,
1047                end_line: chunk.end_line,
1048                source_text_hash,
1049                line_count: 0,
1050                dominant_commit: None,
1051                dominant_commit_lines: 0,
1052                newest_commit: None,
1053                newest_commit_time_s: None,
1054                oldest_commit: None,
1055                oldest_commit_time_s: None,
1056                commit_counts: BTreeMap::new(),
1057                evidence_kind: "historical",
1058            }));
1059        };
1060        let blame_lines =
1061            git_history::blame_lines(root, &chunk.path, chunk.start_line, chunk.end_line);
1062        let mut counts = BTreeMap::<String, i64>::new();
1063        let mut newest = None::<(String, i64)>;
1064        let mut oldest = None::<(String, i64)>;
1065        for line in &blame_lines {
1066            *counts.entry(line.commit.clone()).or_default() += 1;
1067            if let Some(time) = line.author_time_s {
1068                if newest.as_ref().is_none_or(|(_, newest_time)| time > *newest_time) {
1069                    newest = Some((line.commit.clone(), time));
1070                }
1071                if oldest.as_ref().is_none_or(|(_, oldest_time)| time < *oldest_time) {
1072                    oldest = Some((line.commit.clone(), time));
1073                }
1074            }
1075        }
1076        let dominant = counts
1077            .iter()
1078            .max_by_key(|(commit, count)| (*count, *commit))
1079            .map(|(commit, count)| (commit.clone(), *count));
1080        let summary = ChunkBlameSummary {
1081            chunk_id,
1082            path: chunk.path,
1083            start_line: chunk.start_line,
1084            end_line: chunk.end_line,
1085            source_text_hash,
1086            line_count: i64::try_from(blame_lines.len()).unwrap_or(i64::MAX),
1087            dominant_commit: dominant.as_ref().map(|(commit, _)| commit.clone()),
1088            dominant_commit_lines: dominant.map(|(_, count)| count).unwrap_or(0),
1089            newest_commit: newest.as_ref().map(|(commit, _)| commit.clone()),
1090            newest_commit_time_s: newest.as_ref().map(|(_, time)| *time),
1091            oldest_commit: oldest.as_ref().map(|(commit, _)| commit.clone()),
1092            oldest_commit_time_s: oldest.as_ref().map(|(_, time)| *time),
1093            commit_counts: counts,
1094            evidence_kind: "historical",
1095        };
1096        git_history::store_blame(self.storage.connection(), &summary)?;
1097        Ok(Some(summary))
1098    }
1099
1100    pub fn github_sync_from_refs(&self, offline: bool) -> anyhow::Result<GitHubSyncReport> {
1101        self.github_sync_from_refs_with_progress(offline, |_| {})
1102    }
1103
1104    pub fn github_sync_from_refs_with_progress(
1105        &self,
1106        offline: bool,
1107        progress: impl FnMut(github::GitHubSyncProgress),
1108    ) -> anyhow::Result<GitHubSyncReport> {
1109        let Some(root) = self.storage.source_root() else {
1110            anyhow::bail!("index has no source_root metadata; rebuild required");
1111        };
1112        if offline {
1113            github::sync_from_refs::<github::GhCliGitHubClient>(
1114                self.storage.connection(),
1115                root,
1116                None,
1117                true,
1118            )
1119        } else {
1120            let client = github::GhCliGitHubClient;
1121            github::sync_from_refs_with_progress(
1122                self.storage.connection(),
1123                root,
1124                Some(&client),
1125                false,
1126                progress,
1127            )
1128        }
1129    }
1130
1131    pub fn github_sync_issue(
1132        &self,
1133        issue_ref: &str,
1134        offline: bool,
1135    ) -> anyhow::Result<GitHubSyncReport> {
1136        if offline {
1137            github::sync_issue::<github::GhCliGitHubClient>(
1138                self.storage.connection(),
1139                issue_ref,
1140                None,
1141                true,
1142            )
1143        } else {
1144            let client = github::GhCliGitHubClient;
1145            github::sync_issue(self.storage.connection(), issue_ref, Some(&client), false)
1146        }
1147    }
1148
1149    pub fn github_issue_search(
1150        &self,
1151        query: &str,
1152        limit: u32,
1153    ) -> anyhow::Result<Vec<GitHubEvidence>> {
1154        github::issue_search(self.storage.connection(), query, limit)
1155    }
1156
1157    pub fn rationale_search(&self, query: &str, limit: u32) -> anyhow::Result<Vec<GitHubEvidence>> {
1158        github::rationale_search(self.storage.connection(), query, limit)
1159    }
1160
1161    pub fn github_refs_for_path(
1162        &self,
1163        path: &str,
1164        limit: u32,
1165    ) -> anyhow::Result<Vec<github::GitHubRef>> {
1166        github::refs_for_path(self.storage.connection(), path, limit)
1167    }
1168
1169    pub fn github_sync_status(&self) -> anyhow::Result<GitHubStatus> {
1170        self.github_status()
1171    }
1172
1173    pub fn papertrail_for_chunk(
1174        &self,
1175        chunk_id: i64,
1176        limit: u32,
1177    ) -> anyhow::Result<Option<Papertrail>> {
1178        let Some(chunk) = self.read_chunk(chunk_id)? else {
1179            return Ok(None);
1180        };
1181        Ok(Some(github::papertrail_for_chunk(self.storage.connection(), &chunk, limit)?))
1182    }
1183
1184    pub fn papertrail_for_symbol(
1185        &self,
1186        symbol: &str,
1187        language: Option<Language>,
1188        limit: u32,
1189    ) -> anyhow::Result<Option<Papertrail>> {
1190        let Some(symbol) = self.symbols(symbol, language, limit)?.into_iter().next() else {
1191            return Ok(None);
1192        };
1193        Ok(Some(github::papertrail_for_symbol(self.storage.connection(), &symbol, limit)?))
1194    }
1195
1196    pub fn papertrail_for_selected_symbol(
1197        &self,
1198        symbol: &crate::query::symbol::SymbolHit,
1199        limit: u32,
1200    ) -> anyhow::Result<Papertrail> {
1201        github::papertrail_for_symbol(self.storage.connection(), symbol, limit)
1202    }
1203
1204    pub fn papertrail_for_commit(
1205        &self,
1206        commit_hash: &str,
1207        limit: u32,
1208    ) -> anyhow::Result<Papertrail> {
1209        github::papertrail_for_commit(self.storage.connection(), commit_hash, limit)
1210    }
1211
1212    pub fn local_ai_status(&self) -> anyhow::Result<LocalAiStatus> {
1213        ai::status(self.storage.connection())
1214    }
1215
1216    pub fn list_models(&self) -> anyhow::Result<Vec<ModelInfo>> {
1217        ai::models(self.storage.connection())
1218    }
1219
1220    pub fn install_model(&self, model_id: &str) -> anyhow::Result<ModelInfo> {
1221        ai::install_model(self.storage.connection(), model_id)
1222    }
1223
1224    pub fn reconcile(
1225        &self,
1226        limit: Option<u32>,
1227        batch_size: Option<u32>,
1228    ) -> anyhow::Result<ReconcileReport> {
1229        ai::reconcile(self.storage.connection(), limit, batch_size)
1230    }
1231
1232    pub fn reconcile_plan(&self) -> anyhow::Result<ReconcilePlan> {
1233        ai::reconcile_plan(self.storage.connection())
1234    }
1235
1236    pub fn reconcile_with_progress(
1237        &self,
1238        limit: Option<u32>,
1239        batch_size: Option<u32>,
1240        force: bool,
1241        progress: impl FnMut(ai::ReconcileProgress),
1242    ) -> anyhow::Result<ReconcileReport> {
1243        ai::reconcile_with_progress(self.storage.connection(), limit, batch_size, force, progress)
1244    }
1245
1246    pub fn reconcile_with_options_progress(
1247        &self,
1248        options: ai::ReconcileOptions,
1249        progress: impl FnMut(ai::ReconcileProgress),
1250    ) -> anyhow::Result<ReconcileReport> {
1251        ai::reconcile_with_options_progress(self.storage.connection(), options, progress)
1252    }
1253
1254    pub fn current_embedding_count(&self, model_id: &str) -> anyhow::Result<u64> {
1255        ai::current_embedding_count(self.storage.connection(), model_id)
1256    }
1257
1258    pub fn heal_index(&self, limit: Option<u32>) -> anyhow::Result<HealIndexReport> {
1259        let Some(root) = self.storage.source_root() else {
1260            anyhow::bail!("heal_index requires source_root metadata; run `rag-rat index` first");
1261        };
1262        let indexed_files = self.indexed_files()?;
1263        let max_repairs = limit.map(usize::try_from).transpose()?.unwrap_or(usize::MAX);
1264        let mut report = HealIndexReport {
1265            checked_files: 0,
1266            healed_files: 0,
1267            removed_files: 0,
1268            skipped_files: 0,
1269            fts_fresh: false,
1270            message: None,
1271        };
1272
1273        for file in indexed_files {
1274            report.checked_files += 1;
1275            let path = Path::new(&file.path);
1276            let full_path = root.join(path);
1277            let Ok(text) = fs::read_to_string(&full_path) else {
1278                if usize::try_from(report.healed_files + report.removed_files).unwrap_or(usize::MAX)
1279                    >= max_repairs
1280                {
1281                    report.message =
1282                        Some("limit reached; rerun heal_index to continue".to_string());
1283                    break;
1284                }
1285                self.mark_file_deleted(path)?;
1286                report.removed_files += 1;
1287                continue;
1288            };
1289            let sha256 = hex_sha256(text.as_bytes());
1290            if sha256 == file.sha256 {
1291                report.skipped_files += 1;
1292                continue;
1293            }
1294            if usize::try_from(report.healed_files + report.removed_files).unwrap_or(usize::MAX)
1295                >= max_repairs
1296            {
1297                report.message = Some("limit reached; rerun heal_index to continue".to_string());
1298                break;
1299            }
1300            self.heal_file(path)?;
1301            report.healed_files += 1;
1302        }
1303
1304        if report.healed_files > 0 || report.removed_files > 0 {
1305            self.sync_fts()?;
1306        } else {
1307            self.ensure_fts_fresh()?;
1308        }
1309        report.fts_fresh = !self.fts_dirty()?;
1310        Ok(report)
1311    }
1312
1313    pub fn ffi_surface(&self, limit: u32) -> anyhow::Result<Vec<crate::query::impact::ImpactItem>> {
1314        crate::query::impact::ffi_surface(self.storage.connection(), limit)
1315    }
1316
1317    pub fn find_callers(
1318        &self,
1319        symbol: &str,
1320        limit: u32,
1321    ) -> anyhow::Result<Vec<crate::query::graph::GraphHop>> {
1322        crate::query::graph::traverse(self.storage.connection(), symbol, true, limit)
1323    }
1324
1325    pub fn find_callers_with_options(
1326        &self,
1327        symbol: &str,
1328        limit: u32,
1329        options: &crate::query::graph::GraphTraversalOptions,
1330    ) -> anyhow::Result<Vec<crate::query::graph::GraphHop>> {
1331        let options = self.graph_options_with_logical_group(options)?;
1332        crate::query::graph::traverse_with_options(
1333            self.storage.connection(),
1334            symbol,
1335            true,
1336            limit,
1337            &options,
1338        )
1339    }
1340
1341    pub fn trace_callees(
1342        &self,
1343        symbol: &str,
1344        limit: u32,
1345    ) -> anyhow::Result<Vec<crate::query::graph::GraphHop>> {
1346        crate::query::graph::traverse(self.storage.connection(), symbol, false, limit)
1347    }
1348
1349    pub fn trace_callees_with_options(
1350        &self,
1351        symbol: &str,
1352        limit: u32,
1353        options: &crate::query::graph::GraphTraversalOptions,
1354    ) -> anyhow::Result<Vec<crate::query::graph::GraphHop>> {
1355        let options = self.graph_options_with_logical_group(options)?;
1356        crate::query::graph::traverse_with_options(
1357            self.storage.connection(),
1358            symbol,
1359            false,
1360            limit,
1361            &options,
1362        )
1363    }
1364
1365    pub fn graph_traversal_report(
1366        &self,
1367        tool: &str,
1368        symbol: &crate::query::symbol::SymbolHit,
1369        reverse: bool,
1370        limit: u32,
1371        options: &crate::query::graph::GraphTraversalOptions,
1372    ) -> anyhow::Result<crate::query::graph::GraphTraversalReport> {
1373        let options = self.graph_options_with_logical_group(options)?;
1374        let results = crate::query::graph::traverse_with_options(
1375            self.storage.connection(),
1376            &symbol.qualified_name,
1377            reverse,
1378            limit,
1379            &options,
1380        )?;
1381        let summary = crate::query::graph::traversal_summary(
1382            self.storage.connection(),
1383            &symbol.qualified_name,
1384            reverse,
1385            limit,
1386            &options,
1387            results.len(),
1388        )?;
1389        let (logical_symbol, variants) = self.graph_logical_symbol(options.logical_symbol_id)?;
1390        let mut paths = BTreeSet::new();
1391        paths.insert(symbol.path.clone());
1392        for result in &results {
1393            if let Some(callsite) = &result.callsite {
1394                paths.insert(callsite.path.clone());
1395            }
1396        }
1397        let mut coverage = self.graph_coverage(paths)?;
1398        if summary.unresolved > 0 {
1399            coverage.known_index_gaps.push(format!(
1400                "{} unresolved qualified callsites match the requested final segment but are not verified to this symbol",
1401                summary.unresolved
1402            ));
1403        }
1404        Ok(crate::query::graph::GraphTraversalReport {
1405            query: crate::query::graph::GraphTraversalQuery {
1406                tool: tool.to_string(),
1407                symbol_id: Some(symbol.symbol_id),
1408                logical_symbol_id: options.logical_symbol_id,
1409                symbol_path: symbol.qualified_name.clone(),
1410                resolution: options.resolution_mode.as_str().to_string(),
1411            },
1412            logical_symbol,
1413            variants,
1414            summary,
1415            coverage,
1416            results,
1417        })
1418    }
1419
1420    pub fn compare_graph_to_text(
1421        &self,
1422        symbol: &crate::query::symbol::SymbolHit,
1423        pattern: &str,
1424        limit: u32,
1425        options: &crate::query::graph::GraphTraversalOptions,
1426        include_tests: bool,
1427    ) -> anyhow::Result<crate::query::graph::CompareGraphTextReport> {
1428        let regex = Regex::new(pattern)?;
1429        let options = self.graph_options_with_logical_group(options)?;
1430        let mut graph_edges = crate::query::graph::traverse_with_options(
1431            self.storage.connection(),
1432            &symbol.qualified_name,
1433            true,
1434            limit,
1435            &options,
1436        )?;
1437        if !include_tests {
1438            graph_edges.retain(|edge| {
1439                edge.callsite.as_ref().is_none_or(|callsite| !is_test_like_path(&callsite.path))
1440            });
1441        }
1442        let (logical_symbol, variants) = self.graph_logical_symbol(options.logical_symbol_id)?;
1443        let text_hits = self.regex_hits(pattern, &regex, include_tests)?;
1444        let text_by_location = text_hits
1445            .iter()
1446            .map(|hit| ((hit.path.clone(), hit.line), hit))
1447            .collect::<BTreeMap<_, _>>();
1448        let graph_by_location = graph_edges
1449            .iter()
1450            .filter_map(|edge| {
1451                edge.callsite
1452                    .as_ref()
1453                    .map(|callsite| ((callsite.path.clone(), callsite.line), edge))
1454            })
1455            .collect::<BTreeMap<_, _>>();
1456
1457        let mut paths = BTreeSet::new();
1458        paths.insert(symbol.path.clone());
1459        for hit in &text_hits {
1460            paths.insert(hit.path.clone());
1461        }
1462        for edge in &graph_edges {
1463            if let Some(callsite) = &edge.callsite {
1464                paths.insert(callsite.path.clone());
1465            }
1466        }
1467
1468        let parser_failure_paths = self
1469            .parser_failure_paths()?
1470            .into_iter()
1471            .map(|failure| failure.path)
1472            .collect::<BTreeSet<_>>();
1473        let mut matched_hits = Vec::new();
1474        let mut text_only_hits = Vec::new();
1475        let mut likely_parser_gaps = Vec::new();
1476        for hit in &text_hits {
1477            if let Some(edge) = graph_by_location.get(&(hit.path.clone(), hit.line)) {
1478                matched_hits.push(crate::query::graph::MatchedGraphTextHit {
1479                    path: hit.path.clone(),
1480                    line: hit.line,
1481                    text: hit.text.clone(),
1482                    target: edge.target.clone(),
1483                    edge_kind: edge.edge_kind.clone(),
1484                    confidence: edge.confidence.clone(),
1485                    resolution: edge.resolution.clone(),
1486                });
1487            } else {
1488                let gap_kind = classify_text_only_hit(&hit.path, &hit.text, &parser_failure_paths);
1489                let text_only_hit = crate::query::graph::TextOnlyHit {
1490                    path: hit.path.clone(),
1491                    line: hit.line,
1492                    text: hit.text.clone(),
1493                    reason: if gap_kind == "parser_call_extraction" || gap_kind == "parser_failure"
1494                    {
1495                        "no graph edge extracted"
1496                    } else {
1497                        "text mention outside graph-call evidence"
1498                    }
1499                    .to_string(),
1500                    likely_gap: gap_kind.to_string(),
1501                };
1502                if is_likely_parser_gap_kind(gap_kind) {
1503                    likely_parser_gaps.push(text_only_hit.clone());
1504                }
1505                text_only_hits.push(text_only_hit);
1506            }
1507        }
1508
1509        let mut graph_only_edges = Vec::new();
1510        let mut likely_false_positives = Vec::new();
1511        for edge in &graph_edges {
1512            let Some(callsite) = &edge.callsite else {
1513                continue;
1514            };
1515            if text_by_location.contains_key(&(callsite.path.clone(), callsite.line)) {
1516                continue;
1517            }
1518            let current_line = self.current_line_text(&callsite.path, callsite.line)?;
1519            let graph_only = crate::query::graph::GraphOnlyEdge {
1520                path: callsite.path.clone(),
1521                line: callsite.line,
1522                target: edge.target.clone(),
1523                edge_kind: edge.edge_kind.clone(),
1524                confidence: edge.confidence.clone(),
1525                resolution: edge.resolution.clone(),
1526                evidence: edge.evidence.clone(),
1527                reason: "graph edge exists but pattern did not match text".to_string(),
1528                likely_reason: graph_only_reason(edge, current_line.as_deref()),
1529            };
1530            if is_likely_false_positive_graph_only(edge, &graph_only) {
1531                likely_false_positives.push(graph_only.clone());
1532            }
1533            graph_only_edges.push(graph_only);
1534        }
1535        let complete = likely_parser_gaps.is_empty() && likely_false_positives.is_empty();
1536        let recommended_fallback =
1537            recommended_graph_text_fallback(&likely_parser_gaps, &graph_only_edges);
1538        let pattern_match_mode = compare_pattern_match_mode(pattern, &symbol.name);
1539        let mut warnings = Vec::new();
1540        if pattern_match_mode == "substring_identifier" {
1541            warnings.push(format!(
1542                "pattern may match identifiers that merely contain `{}`; use an identifier boundary or escaped call suffix for exact text auditing",
1543                symbol.name
1544            ));
1545        }
1546
1547        Ok(crate::query::graph::CompareGraphTextReport {
1548            query: crate::query::graph::CompareGraphTextQuery {
1549                symbol_id: Some(symbol.symbol_id),
1550                logical_symbol_id: options.logical_symbol_id,
1551                symbol_path: symbol.qualified_name.clone(),
1552                pattern: pattern.to_string(),
1553                resolution: options.resolution_mode.as_str().to_string(),
1554                include_tests,
1555            },
1556            logical_symbol,
1557            variants,
1558            summary: crate::query::graph::CompareGraphTextSummary {
1559                graph_hits: u64::try_from(graph_edges.len()).unwrap_or(u64::MAX),
1560                graph_edges: u64::try_from(graph_edges.len()).unwrap_or(u64::MAX),
1561                text_hits: u64::try_from(text_hits.len()).unwrap_or(u64::MAX),
1562                matched: u64::try_from(matched_hits.len()).unwrap_or(u64::MAX),
1563                graph_only: u64::try_from(graph_only_edges.len()).unwrap_or(u64::MAX),
1564                text_only: u64::try_from(text_only_hits.len()).unwrap_or(u64::MAX),
1565                text_mentions: u64::try_from(text_only_hits.len() - likely_parser_gaps.len())
1566                    .unwrap_or(u64::MAX),
1567                likely_parser_gaps: u64::try_from(likely_parser_gaps.len()).unwrap_or(u64::MAX),
1568                likely_false_positives: u64::try_from(likely_false_positives.len())
1569                    .unwrap_or(u64::MAX),
1570                likely_index_gaps: u64::try_from(likely_parser_gaps.len()).unwrap_or(u64::MAX),
1571                complete,
1572                recommended_fallback,
1573                pattern_match_mode,
1574                warnings,
1575            },
1576            coverage: self.graph_coverage(paths)?,
1577            matched_hits,
1578            text_only_hits,
1579            graph_only_edges,
1580            likely_parser_gaps,
1581            likely_false_positives,
1582        })
1583    }
1584
1585    fn graph_logical_symbol(
1586        &self,
1587        logical_symbol_id: Option<i64>,
1588    ) -> anyhow::Result<(
1589        Option<crate::query::graph::LogicalSymbol>,
1590        Vec<crate::query::graph::LogicalSymbolVariant>,
1591    )> {
1592        let Some(logical_symbol_id) = logical_symbol_id else {
1593            return Ok((None, Vec::new()));
1594        };
1595        let Some(logical) = crate::query::symbol::lookup_logical_by_id(
1596            self.storage.connection(),
1597            logical_symbol_id,
1598        )?
1599        else {
1600            return Ok((None, Vec::new()));
1601        };
1602        let variants = crate::query::symbol::logical_members(
1603            self.storage.connection(),
1604            logical.logical_symbol_id,
1605        )?
1606        .into_iter()
1607        .map(|member| crate::query::graph::LogicalSymbolVariant {
1608            symbol_id: member.symbol_id,
1609            cfg_expr: member.cfg_expr,
1610            signature_hash: member.signature_hash,
1611            start_line: member.start_line,
1612            end_line: member.end_line,
1613        })
1614        .collect::<Vec<_>>();
1615        Ok((
1616            Some(crate::query::graph::LogicalSymbol {
1617                logical_symbol_id: logical.logical_symbol_id,
1618                qualified_name: logical.qualified_name,
1619                variant_count: logical.variant_count,
1620                group_reason: logical.group_reason,
1621            }),
1622            variants,
1623        ))
1624    }
1625
1626    fn graph_options_with_logical_group(
1627        &self,
1628        options: &crate::query::graph::GraphTraversalOptions,
1629    ) -> anyhow::Result<crate::query::graph::GraphTraversalOptions> {
1630        if options.logical_symbol_id.is_some() {
1631            return Ok(options.clone());
1632        }
1633        let Some(symbol_id) = options.symbol_id else {
1634            return Ok(options.clone());
1635        };
1636        let Some(logical) =
1637            crate::query::symbol::logical_for_symbol_id(self.storage.connection(), symbol_id)?
1638        else {
1639            return Ok(options.clone());
1640        };
1641        let mut options = options.clone();
1642        options.logical_symbol_id = Some(logical.logical_symbol_id);
1643        Ok(options)
1644    }
1645
1646    fn local_symbol_context_hits(
1647        &self,
1648        symbol: &crate::query::symbol::SymbolHit,
1649        limit: u32,
1650    ) -> anyhow::Result<Vec<SearchHit>> {
1651        let mut stmt = self.storage.connection().prepare(
1652            "
1653            SELECT chunks.id, files.path, files.language, files.kind,
1654                   chunks.start_line, chunks.end_line, chunks.symbol_path, chunks.text
1655            FROM chunks
1656            JOIN files ON files.id = chunks.file_id
1657            WHERE files.path = ?1
1658              AND (
1659                chunks.symbol_path = ?2
1660                OR chunks.symbol_path LIKE ?3
1661                OR chunks.text LIKE ?4
1662              )
1663            ORDER BY
1664              CASE
1665                WHEN chunks.symbol_path = ?2 THEN 0
1666                WHEN chunks.symbol_path LIKE ?3 THEN 1
1667                ELSE 2
1668              END,
1669              chunks.start_line
1670            LIMIT ?5
1671            ",
1672        )?;
1673        let rows = stmt.query_map(
1674            params![
1675                symbol.path,
1676                symbol.qualified_name,
1677                format!("%{}%", symbol.name),
1678                format!("%{}%", symbol.name),
1679                i64::from(limit.max(1)),
1680            ],
1681            |row| {
1682                let text: String = row.get(7)?;
1683                Ok(SearchHit {
1684                    chunk_id: row.get(0)?,
1685                    path: row.get(1)?,
1686                    language: row.get(2)?,
1687                    kind: row.get(3)?,
1688                    start_line: row.get(4)?,
1689                    end_line: row.get(5)?,
1690                    symbol_path: row.get(6)?,
1691                    score: 1.0,
1692                    summary: bounded_summary(&text),
1693                    graph: None,
1694                    score_components: None,
1695                })
1696            },
1697        )?;
1698        let mut hits = Vec::new();
1699        for row in rows {
1700            hits.push(row?);
1701        }
1702        Ok(hits)
1703    }
1704
1705    pub fn impact_surface(
1706        &self,
1707        query: &str,
1708        limit: u32,
1709    ) -> anyhow::Result<Vec<crate::query::impact::ImpactItem>> {
1710        crate::query::impact::impact_surface(self.storage.connection(), query, limit)
1711    }
1712
1713    pub fn impact_surface_with_options(
1714        &self,
1715        query: &str,
1716        limit: u32,
1717        resolution_mode: crate::query::graph::GraphResolutionMode,
1718    ) -> anyhow::Result<Vec<crate::query::impact::ImpactItem>> {
1719        crate::query::impact::impact_surface_with_options(
1720            self.storage.connection(),
1721            query,
1722            limit,
1723            resolution_mode,
1724        )
1725    }
1726
1727    pub fn impact_surface_for_selected_symbol(
1728        &self,
1729        symbol: &crate::query::symbol::SymbolHit,
1730        limit: u32,
1731        resolution_mode: crate::query::graph::GraphResolutionMode,
1732    ) -> anyhow::Result<Vec<crate::query::impact::ImpactItem>> {
1733        crate::query::impact::impact_surface_for_symbol(
1734            self.storage.connection(),
1735            symbol,
1736            limit,
1737            resolution_mode,
1738        )
1739    }
1740
1741    pub fn impact_surface_report_for_selected_symbol(
1742        &self,
1743        symbol: &crate::query::symbol::SymbolHit,
1744        limit: u32,
1745        options: &crate::query::impact::ImpactSurfaceOptions,
1746    ) -> anyhow::Result<crate::query::impact::ImpactSurfaceReport> {
1747        crate::query::impact::impact_surface_report_for_symbol(
1748            self.storage.connection(),
1749            symbol,
1750            limit,
1751            options,
1752        )
1753    }
1754
1755    pub fn repo_brief(
1756        &self,
1757        options: crate::query::repo_brief::RepoBriefOptions,
1758    ) -> anyhow::Result<crate::query::repo_brief::RepoBrief> {
1759        crate::query::repo_brief::repo_brief(self.storage.connection(), options)
1760    }
1761
1762    pub fn memory_create(
1763        &self,
1764        request: crate::query::memory::RepoMemoryCreate,
1765    ) -> anyhow::Result<crate::query::memory::RepoMemoryCreateResult> {
1766        crate::query::memory::create_memory(self.storage.connection(), request)
1767    }
1768
1769    pub fn memory_update(
1770        &self,
1771        update: crate::query::memory::RepoMemoryUpdate,
1772    ) -> anyhow::Result<crate::query::memory::RepoMemory> {
1773        crate::query::memory::update_memory(self.storage.connection(), update)
1774    }
1775
1776    pub fn memory_mark_obsolete(
1777        &self,
1778        memory_id: &str,
1779    ) -> anyhow::Result<crate::query::memory::RepoMemory> {
1780        crate::query::memory::mark_obsolete(self.storage.connection(), memory_id)
1781    }
1782
1783    pub fn memory_search(
1784        &self,
1785        query: &str,
1786        limit: u32,
1787    ) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
1788        crate::query::memory::memory_search(self.storage.connection(), query, limit)
1789    }
1790
1791    pub fn memory_for_symbol(
1792        &self,
1793        symbol: &crate::query::symbol::SymbolHit,
1794        limit: u32,
1795    ) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
1796        crate::query::memory::memories_for_symbol(self.storage.connection(), symbol, limit)
1797    }
1798
1799    pub fn memory_for_path(
1800        &self,
1801        path: &str,
1802        limit: u32,
1803    ) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
1804        crate::query::memory::memories_for_path(self.storage.connection(), path, limit)
1805    }
1806
1807    pub fn memory_for_edges(
1808        &self,
1809        edge_ids: &[i64],
1810        limit: u32,
1811    ) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
1812        crate::query::memory::memories_for_edges(self.storage.connection(), edge_ids, limit)
1813    }
1814
1815    pub fn memory_evidence_for_symbol_and_edges(
1816        &self,
1817        symbol: &crate::query::symbol::SymbolHit,
1818        edge_ids: &[i64],
1819        limit: u32,
1820    ) -> anyhow::Result<crate::query::memory::RepoMemoryEvidence> {
1821        crate::query::memory::memory_evidence_for_symbol_and_edges(
1822            self.storage.connection(),
1823            symbol,
1824            edge_ids,
1825            limit,
1826        )
1827    }
1828
1829    pub fn memory_for_call_path_hash(
1830        &self,
1831        edge_sequence_hash: &str,
1832        limit: u32,
1833    ) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
1834        crate::query::memory::memories_for_call_path_hash(
1835            self.storage.connection(),
1836            edge_sequence_hash,
1837            limit,
1838        )
1839    }
1840
1841    pub fn memory_validate(
1842        &self,
1843    ) -> anyhow::Result<crate::query::memory::RepoMemoryValidationReport> {
1844        crate::query::memory::validate_memories(self.storage.connection())
1845    }
1846
1847    pub fn rebuild_fts(&self) -> anyhow::Result<()> {
1848        schema::rebuild_fts(self.storage.connection())?;
1849        self.record_content_revision()?;
1850        self.record_fts_current()?;
1851        self.set_meta("fts_dirty", "false")?;
1852        Ok(())
1853    }
1854
1855    pub fn sync_fts(&self) -> anyhow::Result<()> {
1856        self.record_content_revision()?;
1857        self.record_fts_current()?;
1858        self.set_meta("fts_dirty", "false")?;
1859        Ok(())
1860    }
1861
1862    fn record_fts_current(&self) -> anyhow::Result<()> {
1863        self.set_meta("fts_synced_at_ms", &now_ms().to_string())?;
1864        let revision = self.content_revision()?;
1865        self.set_meta("fts_source_revision", &revision)?;
1866        Ok(())
1867    }
1868
1869    fn record_content_revision(&self) -> anyhow::Result<String> {
1870        let revision = self.content_revision()?;
1871        self.set_meta("content_revision", &revision)?;
1872        Ok(revision)
1873    }
1874
1875    pub fn heal_file(&self, path: &Path) -> anyhow::Result<()> {
1876        let Some(root) = self.storage.source_root() else {
1877            anyhow::bail!("index has no source_root metadata; rebuild required");
1878        };
1879        let row = self.file_row(path)?;
1880        let full_path = root.join(path);
1881        let text = fs::read_to_string(&full_path)?;
1882
1883        let changes = git_changed_paths(root).unwrap_or_default();
1884        let is_dirty = changes.changed.contains(path);
1885        let has_base_commit = !self.active_commit_sha.is_empty();
1886        let scope = if !has_base_commit || is_dirty {
1887            FileScope::worktree(self.active_worktree_id.clone())
1888        } else {
1889            FileScope::commit(self.active_commit_sha.clone())
1890        };
1891        self.remove_file_in_scope(path, &scope.commit_sha, &scope.worktree_id)?;
1892
1893        self.index_file(
1894            path,
1895            row.language,
1896            row.kind,
1897            file_metadata_ms(&full_path)?,
1898            &text,
1899            &scope,
1900        )?;
1901        self.rebuild_logical_symbols()?;
1902        self.resolve_edges()
1903    }
1904
1905    fn index_file(
1906        &self,
1907        path: &Path,
1908        language: Language,
1909        kind: TargetKind,
1910        modified_at_ms: i64,
1911        text: &str,
1912        scope: &FileScope,
1913    ) -> anyhow::Result<()> {
1914        if language != Language::Markdown && kind != TargetKind::Generated {
1915            if text.len() > chunker::MAX_STRUCTURAL_PARSE_BYTES {
1916                // Large source files are intentionally coarse-indexed to keep full-repo indexing
1917                // responsive. This is not a parser failure.
1918            } else if let Some(message) = parser::parse_error(path, language, text)
1919                .unwrap_or_else(|err| Some(err.to_string()))
1920            {
1921                self.insert_parser_failure(path, language, &message)?;
1922            }
1923        }
1924        let sha256 = hex_sha256(text.as_bytes());
1925        let file_id = self.storage.connection().query_row(
1926            "INSERT INTO main.files(path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms, indexed_revision, commit_sha, worktree_id)
1927             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)
1928             RETURNING id",
1929            params![
1930                path_string(path),
1931                language.as_str(),
1932                kind.as_str(),
1933                sha256,
1934                modified_at_ms,
1935                matches!(kind, TargetKind::Generated),
1936                now_ms(),
1937                sha256,
1938                &scope.commit_sha,
1939                &scope.worktree_id,
1940            ],
1941            |row| row.get::<_, i64>(0),
1942        )?;
1943        let chunks = if kind == TargetKind::Generated {
1944            chunker::generated_chunks_for_file(path, text)
1945        } else {
1946            chunker::chunks_for_file(path, language, text)
1947        };
1948        let symbols =
1949            if kind == TargetKind::Generated || text.len() > chunker::MAX_STRUCTURAL_PARSE_BYTES {
1950                Vec::new()
1951            } else {
1952                symbols::symbols_for_file(path, language, text)
1953            };
1954        self.insert_chunks(file_id, &sha256, &chunks, text)?;
1955        self.insert_symbols(file_id, language, &symbols)?;
1956        if kind != TargetKind::Generated && text.len() <= edges::MAX_GRAPH_PARSE_BYTES {
1957            edges::index_file_edges(self.storage.connection(), file_id, path, language, text)?;
1958        }
1959        self.mark_fts_dirty()?;
1960        Ok(())
1961    }
1962
1963    fn insert_prepared_file(&self, prepared_file: &PreparedIndexFile) -> anyhow::Result<()> {
1964        let file = &prepared_file.file;
1965        let prepared = match &prepared_file.prepared {
1966            Ok(prepared) => prepared,
1967            Err(err) => {
1968                self.insert_parser_failure(&file.relative_path, file.language, &err.to_string())?;
1969                return Ok(());
1970            },
1971        };
1972        if let Some(message) = &prepared.parser_failure {
1973            self.insert_parser_failure(&file.relative_path, file.language, message)?;
1974        }
1975        let file_id = self.storage.connection().query_row(
1976            "INSERT INTO main.files(path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms, indexed_revision, commit_sha, worktree_id)
1977             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)
1978             RETURNING id",
1979            params![
1980                path_string(&file.relative_path),
1981                file.language.as_str(),
1982                file.kind.as_str(),
1983                prepared.sha256,
1984                prepared.modified_at_ms,
1985                matches!(file.kind, TargetKind::Generated),
1986                now_ms(),
1987                prepared.sha256,
1988                file.commit_sha,
1989                file.worktree_id,
1990            ],
1991            |row| row.get::<_, i64>(0),
1992        )?;
1993        self.insert_chunks(file_id, &prepared.sha256, &prepared.chunks, &prepared.text)?;
1994        self.insert_symbols(file_id, file.language, &prepared.symbols)?;
1995        if file.kind != TargetKind::Generated && prepared.text.len() <= edges::MAX_GRAPH_PARSE_BYTES
1996        {
1997            edges::index_file_edges(
1998                self.storage.connection(),
1999                file_id,
2000                &file.relative_path,
2001                file.language,
2002                &prepared.text,
2003            )?;
2004        }
2005        self.mark_fts_dirty()?;
2006        Ok(())
2007    }
2008
2009    fn insert_chunks(
2010        &self,
2011        file_id: i64,
2012        source_revision: &str,
2013        chunks: &[Chunk],
2014        full_text: &str,
2015    ) -> anyhow::Result<()> {
2016        let (path, language, kind) = self.storage.connection().query_row(
2017            "SELECT path, language, kind FROM main.files WHERE id = ?1",
2018            [file_id],
2019            |row| {
2020                Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?, row.get::<_, String>(2)?))
2021            },
2022        )?;
2023        for chunk in chunks {
2024            let anchor =
2025                anchors::anchor_for_text(&chunk.text, chunk.start_line, chunk.end_line, full_text);
2026            let embedding_policy = ai::embedding_policy_for_chunk(
2027                Path::new(&path),
2028                &language,
2029                &kind,
2030                chunk.kind,
2031                chunk.symbol_path.as_deref(),
2032                &chunk.text,
2033                ai::DEFAULT_MAX_EMBEDDING_CHARS,
2034            );
2035            self.storage.connection().execute(
2036                "INSERT INTO chunks(file_id, chunk_kind, symbol_path, start_byte, end_byte, start_line, end_line, text, text_hash,
2037                                    source_revision, anchor_version, normalized_hash, start_boundary_hash, end_boundary_hash,
2038                                    start_context_hash, end_context_hash, context_radius, embedding_policy, embedding_priority)
2039                 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19)",
2040                params![
2041                    file_id,
2042                    chunk.kind,
2043                    chunk.symbol_path,
2044                    i64::try_from(chunk.start_byte)?,
2045                    i64::try_from(chunk.end_byte)?,
2046                    i64::try_from(chunk.start_line)?,
2047                    i64::try_from(chunk.end_line)?,
2048                    chunk.text,
2049                    hex_sha256(chunk.text.as_bytes()),
2050                    source_revision,
2051                    anchor.version,
2052                    anchor.normalized_hash,
2053                    anchor.start_boundary_hash,
2054                    anchor.end_boundary_hash,
2055                    anchor.start_context_hash,
2056                    anchor.end_context_hash,
2057                    anchor.context_radius,
2058                    embedding_policy.policy,
2059                    embedding_policy.priority,
2060                ],
2061            )?;
2062            let chunk_id = self.storage.connection().last_insert_rowid();
2063            self.storage.connection().execute(
2064                "INSERT INTO chunk_fts(rowid, text) VALUES (?1, ?2)",
2065                params![chunk_id, chunk.text],
2066            )?;
2067        }
2068        Ok(())
2069    }
2070
2071    fn insert_symbols(
2072        &self,
2073        file_id: i64,
2074        language: Language,
2075        symbols: &[Symbol],
2076    ) -> anyhow::Result<()> {
2077        for symbol in symbols {
2078            self.storage.connection().execute(
2079                "INSERT INTO symbols(file_id, language, name, qualified_name, kind, start_byte, end_byte, signature, docs)
2080                 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)",
2081                params![
2082                    file_id,
2083                    language.as_str(),
2084                    symbol.name,
2085                    symbol.qualified_name,
2086                    symbol.kind,
2087                    i64::try_from(symbol.start_byte)?,
2088                    i64::try_from(symbol.end_byte)?,
2089                    symbol.signature,
2090                    symbol.docs,
2091                ],
2092            )?;
2093            let symbol_id = self.storage.connection().last_insert_rowid();
2094            for fact in &symbol.facts {
2095                self.storage.connection().execute(
2096                    "INSERT OR IGNORE INTO symbol_facts(symbol_id, fact_kind, fact_value)
2097                     VALUES (?1, ?2, ?3)",
2098                    params![symbol_id, fact.kind, fact.value],
2099                )?;
2100            }
2101        }
2102        Ok(())
2103    }
2104
2105    fn write_git_meta(&self, root: &Path) -> anyhow::Result<()> {
2106        self.set_meta("git_commit", &git_output(root, &["rev-parse", "HEAD"]).unwrap_or_default())?;
2107        let dirty = !git_output(root, &["status", "--porcelain"]).unwrap_or_default().is_empty();
2108        self.set_meta("git_dirty", if dirty { "true" } else { "false" })?;
2109        Ok(())
2110    }
2111
2112    fn apply_prepared_git_history(
2113        &self,
2114        root: &Path,
2115        handle: JoinHandle<anyhow::Result<git_history::PreparedGitHistory>>,
2116    ) -> anyhow::Result<GitHistoryIndexStatus> {
2117        let prepared = join_git_history_prepare(handle)?;
2118        git_history::apply_prepared(self.storage.connection(), root, prepared)
2119    }
2120
2121    fn git_history_status(&self) -> anyhow::Result<GitHistoryIndexStatus> {
2122        let Some(root) = self.storage.source_root() else {
2123            return git_history::status(self.storage.connection(), Path::new("."));
2124        };
2125        git_history::status(self.storage.connection(), root)
2126    }
2127
2128    fn github_status(&self) -> anyhow::Result<GitHubStatus> {
2129        github::status(self.storage.connection())
2130    }
2131
2132    fn mark_fts_dirty(&self) -> anyhow::Result<()> {
2133        self.set_meta("fts_dirty", "true")
2134    }
2135
2136    fn resolve_edges(&self) -> anyhow::Result<()> {
2137        edges::resolve_all_edges(self.storage.connection())
2138    }
2139
2140    fn rebuild_logical_symbols(&self) -> anyhow::Result<()> {
2141        self.storage.connection().execute_batch(
2142            "
2143            CREATE TEMP TABLE IF NOT EXISTS logical_symbols_to_rebuild(id INTEGER PRIMARY KEY);
2144            DELETE FROM temp.logical_symbols_to_rebuild;
2145            INSERT OR IGNORE INTO temp.logical_symbols_to_rebuild(id)
2146            SELECT logical_symbol_members.logical_symbol_id
2147            FROM main.logical_symbol_members
2148            JOIN main.symbols ON symbols.id = logical_symbol_members.symbol_id
2149            JOIN files ON files.id = symbols.file_id;
2150            DELETE FROM main.logical_symbol_members
2151            WHERE logical_symbol_id IN (
2152                SELECT id FROM temp.logical_symbols_to_rebuild
2153            );
2154            DELETE FROM main.logical_symbols
2155            WHERE id IN (
2156                SELECT id FROM temp.logical_symbols_to_rebuild
2157            );
2158            DELETE FROM temp.logical_symbols_to_rebuild;
2159            ",
2160        )?;
2161
2162        let mut stmt = self.storage.connection().prepare(
2163            "
2164            SELECT symbols.id, symbols.file_id, files.path, symbols.language, symbols.name,
2165                   symbols.qualified_name, symbols.kind, symbols.start_byte, symbols.end_byte,
2166                   symbols.signature,
2167                   COALESCE((
2168                     SELECT chunks.start_byte
2169                     FROM chunks
2170                     WHERE chunks.file_id = symbols.file_id
2171                       AND symbols.start_byte >= chunks.start_byte
2172                       AND symbols.start_byte < chunks.end_byte
2173                     ORDER BY chunks.end_byte - chunks.start_byte ASC
2174                     LIMIT 1
2175                   ), symbols.start_byte) AS chunk_start_byte,
2176                   COALESCE((
2177                     SELECT chunks.start_line
2178                     FROM chunks
2179                     WHERE chunks.file_id = symbols.file_id
2180                       AND symbols.start_byte >= chunks.start_byte
2181                       AND symbols.start_byte < chunks.end_byte
2182                     ORDER BY chunks.end_byte - chunks.start_byte ASC
2183                     LIMIT 1
2184                   ), 1) AS chunk_start_line,
2185                   COALESCE((
2186                     SELECT chunks.text
2187                     FROM chunks
2188                     WHERE chunks.file_id = symbols.file_id
2189                       AND symbols.start_byte >= chunks.start_byte
2190                       AND symbols.start_byte < chunks.end_byte
2191                     ORDER BY chunks.end_byte - chunks.start_byte ASC
2192                     LIMIT 1
2193                   ), '') AS chunk_text
2194            FROM symbols
2195            JOIN files ON files.id = symbols.file_id
2196            ORDER BY files.path, symbols.language, symbols.qualified_name, symbols.kind,
2197                     symbols.start_byte, symbols.end_byte
2198            ",
2199        )?;
2200        let rows = stmt.query_map([], |row| {
2201            let start_byte = usize::try_from(row.get::<_, i64>(7)?).unwrap_or(0);
2202            let end_byte = usize::try_from(row.get::<_, i64>(8)?).unwrap_or(0);
2203            let chunk_start_byte = usize::try_from(row.get::<_, i64>(10)?).unwrap_or(start_byte);
2204            let chunk_start_line = row.get::<_, i64>(11)?;
2205            let chunk_text: String = row.get(12)?;
2206            let start_line =
2207                symbol_line_for_byte(&chunk_text, chunk_start_byte, chunk_start_line, start_byte);
2208            let end_line =
2209                symbol_line_for_byte(&chunk_text, chunk_start_byte, chunk_start_line, end_byte);
2210            Ok(LogicalSymbolMemberRow {
2211                symbol_id: row.get(0)?,
2212                path: row.get(2)?,
2213                language: row.get(3)?,
2214                name: row.get(4)?,
2215                qualified_name: row.get(5)?,
2216                kind: row.get(6)?,
2217                signature: row.get(9)?,
2218                start_line,
2219                end_line,
2220            })
2221        })?;
2222        let mut groups: BTreeMap<LogicalSymbolKey, Vec<LogicalSymbolMemberRow>> = BTreeMap::new();
2223        for row in rows {
2224            let row = row?;
2225            groups.entry(LogicalSymbolKey::from(&row)).or_default().push(row);
2226        }
2227        for (key, members) in groups {
2228            let group_reason = if members.len() > 1 { "cfg_variant" } else { "single" };
2229            self.storage.connection().execute(
2230                "
2231                INSERT INTO logical_symbols(language, path, logical_name, qualified_name, kind, variant_count, group_reason)
2232                VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)
2233                ",
2234                params![
2235                    key.language,
2236                    key.path,
2237                    key.name,
2238                    key.qualified_name,
2239                    key.kind,
2240                    i64::try_from(members.len()).unwrap_or(i64::MAX),
2241                    group_reason,
2242                ],
2243            )?;
2244            let logical_symbol_id = self.storage.connection().last_insert_rowid();
2245            for member in members {
2246                let signature_hash =
2247                    member.signature.as_deref().map(|signature| hex_sha256(signature.as_bytes()));
2248                self.storage.connection().execute(
2249                    "
2250                    INSERT INTO logical_symbol_members(
2251                        logical_symbol_id, symbol_id, cfg_expr, signature_hash, start_line, end_line
2252                    )
2253                    VALUES (?1, ?2, NULL, ?3, ?4, ?5)
2254                    ",
2255                    params![
2256                        logical_symbol_id,
2257                        member.symbol_id,
2258                        signature_hash,
2259                        member.start_line,
2260                        member.end_line,
2261                    ],
2262                )?;
2263            }
2264        }
2265        Ok(())
2266    }
2267
2268    fn graph_coverage(
2269        &self,
2270        paths: BTreeSet<String>,
2271    ) -> anyhow::Result<crate::query::graph::GraphCoverage> {
2272        let indexed_files =
2273            self.storage
2274                .connection()
2275                .query_row("SELECT COUNT(*) FROM files", [], |row| row.get::<_, i64>(0))?;
2276        let parser_failure_paths = self.parser_failure_paths()?;
2277        let parser_failures = u64::try_from(parser_failure_paths.len()).unwrap_or(0);
2278        let known_index_gaps = parser_failure_paths
2279            .iter()
2280            .map(|failure| {
2281                format!(
2282                    "{} parser failed for {}: {}",
2283                    failure.language, failure.path, failure.message
2284                )
2285            })
2286            .collect::<Vec<_>>();
2287        let mut stale_files = 0_u64;
2288        let mut parser_coverage_for_paths = Vec::new();
2289        for path in paths {
2290            let Some(row) = self.graph_path_row(&path)? else {
2291                parser_coverage_for_paths.push(crate::query::graph::GraphPathCoverage {
2292                    path,
2293                    language: "unknown".to_string(),
2294                    parser_status: "missing_from_index".to_string(),
2295                    graph_status: "missing_from_index".to_string(),
2296                    last_indexed_revision: None,
2297                });
2298                continue;
2299            };
2300            let stale = self.source_path_is_stale(&path, &row.sha256);
2301            if stale {
2302                stale_files += 1;
2303            }
2304            let parser_failed = parser_failure_paths.iter().any(|failure| failure.path == path);
2305            parser_coverage_for_paths.push(crate::query::graph::GraphPathCoverage {
2306                path,
2307                language: row.language,
2308                parser_status: if parser_failed { "failed" } else { "ok" }.to_string(),
2309                graph_status: if stale {
2310                    "stale_source"
2311                } else if parser_failed {
2312                    "parser_failed"
2313                } else {
2314                    "ok"
2315                }
2316                .to_string(),
2317                last_indexed_revision: (!row.indexed_revision.is_empty())
2318                    .then_some(row.indexed_revision),
2319            });
2320        }
2321        Ok(crate::query::graph::GraphCoverage {
2322            indexed_files: u64::try_from(indexed_files).unwrap_or(0),
2323            parser_failures,
2324            stale_files,
2325            known_index_gaps,
2326            parser_coverage_for_paths,
2327        })
2328    }
2329
2330    fn graph_path_row(&self, path: &str) -> anyhow::Result<Option<GraphPathRow>> {
2331        self.storage
2332            .connection()
2333            .query_row(
2334                "SELECT language, sha256, indexed_revision FROM files WHERE path = ?1",
2335                [path],
2336                |row| {
2337                    Ok(GraphPathRow {
2338                        language: row.get(0)?,
2339                        sha256: row.get(1)?,
2340                        indexed_revision: row.get(2)?,
2341                    })
2342                },
2343            )
2344            .optional()
2345            .map_err(Into::into)
2346    }
2347
2348    fn source_path_is_stale(&self, path: &str, indexed_sha256: &str) -> bool {
2349        let Some(root) = self.storage.source_root() else {
2350            return false;
2351        };
2352        let Ok(bytes) = fs::read(root.join(path)) else {
2353            return true;
2354        };
2355        hex_sha256(&bytes) != indexed_sha256
2356    }
2357
2358    fn regex_hits(
2359        &self,
2360        pattern: &str,
2361        regex: &Regex,
2362        include_tests: bool,
2363    ) -> anyhow::Result<Vec<crate::query::graph::TextOnlyHit>> {
2364        let Some(root) = self.storage.source_root() else {
2365            anyhow::bail!("cannot compare graph to text: source_root is missing from index_meta");
2366        };
2367        let mut stmt = self.storage.connection().prepare("SELECT path FROM files ORDER BY path")?;
2368        let paths =
2369            stmt.query_map([], |row| row.get::<_, String>(0))?.collect::<Result<Vec<_>, _>>()?;
2370        let mut hits = Vec::new();
2371        for path in paths {
2372            if !include_tests && is_test_like_path(&path) {
2373                continue;
2374            }
2375            let full_path = root.join(&path);
2376            let Ok(text) = fs::read_to_string(&full_path) else {
2377                continue;
2378            };
2379            for (index, line) in text.lines().enumerate() {
2380                if regex.is_match(line) {
2381                    hits.push(crate::query::graph::TextOnlyHit {
2382                        path: path.clone(),
2383                        line: i64::try_from(index + 1).unwrap_or(i64::MAX),
2384                        text: line.trim().to_string(),
2385                        reason: "text pattern matched".to_string(),
2386                        likely_gap: pattern.to_string(),
2387                    });
2388                }
2389            }
2390        }
2391        Ok(hits)
2392    }
2393
2394    fn current_line_text(&self, path: &str, line: i64) -> anyhow::Result<Option<String>> {
2395        let Some(root) = self.storage.source_root() else {
2396            return Ok(None);
2397        };
2398        let Ok(text) = fs::read_to_string(root.join(path)) else {
2399            return Ok(None);
2400        };
2401        let Some(index) = usize::try_from(line.saturating_sub(1)).ok() else {
2402            return Ok(None);
2403        };
2404        Ok(text.lines().nth(index).map(|line| line.trim().to_string()))
2405    }
2406
2407    fn ensure_graph_index_current(&self) -> anyhow::Result<()> {
2408        if self.meta("graph_index_version")?.as_deref() == Some(GRAPH_INDEX_VERSION) {
2409            return Ok(());
2410        }
2411        let Some(root) = self.storage.source_root().map(Path::to_path_buf) else {
2412            return Ok(());
2413        };
2414        self.storage.execute_batch("BEGIN IMMEDIATE TRANSACTION")?;
2415        let result = (|| -> anyhow::Result<()> {
2416            self.storage.connection().execute("DELETE FROM edges", [])?;
2417            let files = self.graph_reindex_files()?;
2418            for file in files {
2419                if file.kind == TargetKind::Generated || file.language == Language::Markdown {
2420                    continue;
2421                }
2422                let full_path = root.join(&file.path);
2423                let Ok(text) = fs::read_to_string(full_path) else {
2424                    continue;
2425                };
2426                if text.len() > edges::MAX_GRAPH_PARSE_BYTES {
2427                    continue;
2428                }
2429                edges::index_file_edges(
2430                    self.storage.connection(),
2431                    file.id,
2432                    Path::new(&file.path),
2433                    file.language,
2434                    &text,
2435                )?;
2436            }
2437            self.resolve_edges()?;
2438            self.mark_graph_index_current()?;
2439            Ok(())
2440        })();
2441        if result.is_err() {
2442            let _ = self.storage.execute_batch("ROLLBACK");
2443        }
2444        result?;
2445        self.storage.execute_batch("COMMIT")?;
2446        Ok(())
2447    }
2448
2449    fn mark_graph_index_current(&self) -> anyhow::Result<()> {
2450        self.set_meta("graph_index_version", GRAPH_INDEX_VERSION)
2451    }
2452
2453    fn set_meta(&self, key: &str, value: &str) -> anyhow::Result<()> {
2454        self.storage.connection().execute(
2455            "INSERT INTO index_meta(key, value) VALUES (?1, ?2)
2456             ON CONFLICT(key) DO UPDATE SET value = excluded.value",
2457            params![key, value],
2458        )?;
2459        Ok(())
2460    }
2461
2462    fn meta(&self, key: &str) -> anyhow::Result<Option<String>> {
2463        meta_for(self.storage.connection(), key)
2464    }
2465
2466    fn insert_parser_failure(
2467        &self,
2468        path: &Path,
2469        language: Language,
2470        message: &str,
2471    ) -> anyhow::Result<()> {
2472        self.storage.connection().execute(
2473            "INSERT INTO parser_failures(path, language, message) VALUES (?1, ?2, ?3)",
2474            params![path_string(path), language.as_str(), message],
2475        )?;
2476        Ok(())
2477    }
2478
2479    fn parser_failure_count(&self) -> anyhow::Result<u64> {
2480        let count = self.storage.connection().query_row(
2481            "SELECT COUNT(*) FROM parser_failures",
2482            [],
2483            |row| row.get::<_, i64>(0),
2484        )?;
2485        Ok(u64::try_from(count).unwrap_or(0))
2486    }
2487
2488    fn parser_failure_paths(&self) -> anyhow::Result<Vec<ParserFailure>> {
2489        let mut stmt = self.storage.connection().prepare(
2490            "SELECT path, language, message FROM parser_failures ORDER BY path, language, message",
2491        )?;
2492        let rows = stmt.query_map([], |row| {
2493            Ok(ParserFailure { path: row.get(0)?, language: row.get(1)?, message: row.get(2)? })
2494        })?;
2495        let mut failures = Vec::new();
2496        for row in rows {
2497            failures.push(row?);
2498        }
2499        Ok(failures)
2500    }
2501
2502    fn search_with_heal(
2503        &self,
2504        query: &str,
2505        limit: u32,
2506        include_generated: bool,
2507        allow_heal: bool,
2508        explain: bool,
2509        options: SearchOptions,
2510    ) -> anyhow::Result<Vec<SearchHit>> {
2511        let hits = crate::search::lexical::search_with_options(
2512            self.storage.connection(),
2513            query,
2514            limit,
2515            include_generated,
2516            explain,
2517            options,
2518        )?;
2519        if !allow_heal {
2520            return Ok(hits);
2521        }
2522        let stale = self.stale_hit_paths(&hits)?;
2523        if stale.is_empty() {
2524            return Ok(hits);
2525        }
2526        if stale.len() > MAX_AUTO_HEAL_FILES_PER_CALL {
2527            anyhow::bail!(IndexError::NeedsReindex {
2528                stale_files: stale.len(),
2529                cap: MAX_AUTO_HEAL_FILES_PER_CALL,
2530            });
2531        }
2532        for path in stale {
2533            self.heal_file(Path::new(&path))?;
2534        }
2535        self.sync_fts()?;
2536        self.search_with_heal(query, limit, include_generated, false, explain, options)
2537    }
2538
2539    fn stale_hit_paths(&self, hits: &[SearchHit]) -> anyhow::Result<Vec<String>> {
2540        let Some(root) = self.storage.source_root() else {
2541            return Ok(Vec::new());
2542        };
2543        let mut stale = Vec::new();
2544        let mut seen = BTreeSet::new();
2545        for hit in hits {
2546            if !seen.insert(hit.path.clone()) {
2547                continue;
2548            }
2549            let source_path = root.join(&hit.path);
2550            let Ok(text) = fs::read_to_string(source_path) else {
2551                stale.push(hit.path.clone());
2552                continue;
2553            };
2554            let chunk = crate::query::read_chunk(self.storage.connection(), hit.chunk_id)?;
2555            let Some(chunk) = chunk else {
2556                stale.push(hit.path.clone());
2557                continue;
2558            };
2559            let anchor = self.chunk_anchor(hit.chunk_id)?;
2560            let status = anchors::validate(
2561                &chunk.text,
2562                usize::try_from(chunk.start_line).unwrap_or(1),
2563                usize::try_from(chunk.end_line).unwrap_or(1),
2564                &anchor,
2565                &text,
2566            );
2567            if !matches!(status, AnchorStatus::Exact) {
2568                stale.push(hit.path.clone());
2569            }
2570        }
2571        Ok(stale)
2572    }
2573
2574    fn chunk_anchor(&self, chunk_id: i64) -> anyhow::Result<ChunkAnchor> {
2575        Ok(self.storage.connection().query_row(
2576            "
2577            SELECT anchor_version, normalized_hash, start_boundary_hash, end_boundary_hash,
2578                   start_context_hash, end_context_hash, context_radius
2579            FROM chunks WHERE id = ?1
2580            ",
2581            [chunk_id],
2582            |row| {
2583                Ok(ChunkAnchor {
2584                    version: row.get(0)?,
2585                    normalized_hash: row.get(1)?,
2586                    start_boundary_hash: row.get(2)?,
2587                    end_boundary_hash: row.get(3)?,
2588                    start_context_hash: row.get(4)?,
2589                    end_context_hash: row.get(5)?,
2590                    context_radius: row.get(6)?,
2591                })
2592            },
2593        )?)
2594    }
2595
2596    fn mark_file_deleted(&self, path: &Path) -> anyhow::Result<()> {
2597        let path = path_string(path);
2598        self.remove_file_in_scope(Path::new(&path), "", &self.active_worktree_id)?;
2599        self.storage.connection().execute(
2600            "INSERT INTO main.files(path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms, indexed_revision, commit_sha, worktree_id)
2601             VALUES (?1, 'unknown', 'deleted', '', 0, 0, ?2, '', '', ?3)
2602             ON CONFLICT(path, commit_sha, worktree_id) DO UPDATE SET
2603                kind = 'deleted',
2604                sha256 = '',
2605                modified_at_ms = 0,
2606                indexed_at_ms = excluded.indexed_at_ms",
2607            params![path, now_ms(), self.active_worktree_id],
2608        )?;
2609        self.mark_fts_dirty()?;
2610        Ok(())
2611    }
2612
2613    fn remove_file_in_scope(
2614        &self,
2615        path: &Path,
2616        commit_sha: &str,
2617        worktree_id: &str,
2618    ) -> anyhow::Result<()> {
2619        let path = path_string(path);
2620        self.storage.connection().execute(
2621            "UPDATE edges
2622             SET to_symbol_id = NULL,
2623                 confidence = 'NameOnly'
2624             WHERE to_symbol_id IN (
2625                 SELECT symbols.id FROM symbols
2626                 JOIN main.files ON main.files.id = symbols.file_id
2627                 WHERE main.files.path = ?1
2628                   AND main.files.commit_sha = ?2
2629                   AND main.files.worktree_id = ?3
2630             )",
2631            params![path, commit_sha, worktree_id],
2632        )?;
2633        self.storage.connection().execute(
2634            "DELETE FROM edges
2635             WHERE source_file_id IN (
2636                    SELECT id FROM main.files
2637                    WHERE path = ?1 AND commit_sha = ?2 AND worktree_id = ?3
2638                )
2639                OR from_symbol_id IN (
2640                    SELECT symbols.id FROM symbols
2641                    JOIN main.files ON main.files.id = symbols.file_id
2642                    WHERE main.files.path = ?1
2643                      AND main.files.commit_sha = ?2
2644                      AND main.files.worktree_id = ?3
2645                )",
2646            params![path, commit_sha, worktree_id],
2647        )?;
2648        self.storage
2649            .connection()
2650            .execute("DELETE FROM parser_failures WHERE path = ?1", [&path])?;
2651        self.storage.connection().execute(
2652            "DELETE FROM chunk_fts
2653             WHERE rowid IN (
2654                 SELECT chunks.id FROM chunks
2655                 JOIN main.files ON main.files.id = chunks.file_id
2656                 WHERE main.files.path = ?1
2657                   AND main.files.commit_sha = ?2
2658                   AND main.files.worktree_id = ?3
2659             )",
2660            params![path, commit_sha, worktree_id],
2661        )?;
2662        self.storage.connection().execute(
2663            "DELETE FROM chunks
2664             WHERE file_id IN (
2665                SELECT id FROM main.files
2666                WHERE path = ?1 AND commit_sha = ?2 AND worktree_id = ?3
2667             )",
2668            params![path, commit_sha, worktree_id],
2669        )?;
2670        self.storage.connection().execute(
2671            "DELETE FROM symbols
2672             WHERE file_id IN (
2673                SELECT id FROM main.files
2674                WHERE path = ?1 AND commit_sha = ?2 AND worktree_id = ?3
2675             )",
2676            params![path, commit_sha, worktree_id],
2677        )?;
2678        self.storage.connection().execute(
2679            "DELETE FROM main.files WHERE path = ?1 AND commit_sha = ?2 AND worktree_id = ?3",
2680            params![path, commit_sha, worktree_id],
2681        )?;
2682        self.mark_fts_dirty()?;
2683        Ok(())
2684    }
2685
2686    fn ensure_fts_fresh(&self) -> anyhow::Result<()> {
2687        let content_revision = self.content_revision()?;
2688        let fts_source_revision = self.meta("fts_source_revision")?;
2689        if !self.fts_dirty()? && fts_source_revision.as_deref() == Some(content_revision.as_str()) {
2690            return Ok(());
2691        }
2692        self.rebuild_fts()?;
2693        let refreshed_revision = self.meta("fts_source_revision")?;
2694        if refreshed_revision.as_deref() != Some(content_revision.as_str()) {
2695            anyhow::bail!(
2696                "FTS freshness invariant failed: content_revision={content_revision}, fts_source_revision={}",
2697                refreshed_revision.unwrap_or_else(|| "<missing>".to_string())
2698            );
2699        }
2700        Ok(())
2701    }
2702
2703    fn fts_dirty(&self) -> anyhow::Result<bool> {
2704        Ok(self.meta("fts_dirty")?.as_deref() == Some("true"))
2705    }
2706
2707    fn file_row(&self, path: &Path) -> anyhow::Result<FileRow> {
2708        self.storage
2709            .connection()
2710            .query_row(
2711                "SELECT language, kind FROM files WHERE path = ?1",
2712                [path_string(path)],
2713                |row| {
2714                    let language: String = row.get(0)?;
2715                    let kind: String = row.get(1)?;
2716                    Ok((language, kind))
2717                },
2718            )
2719            .map_err(Into::into)
2720            .and_then(|(language, kind)| {
2721                Ok(FileRow { language: language.parse()?, kind: kind.parse()? })
2722            })
2723    }
2724
2725    fn graph_reindex_files(&self) -> anyhow::Result<Vec<GraphReindexFile>> {
2726        let mut stmt = self
2727            .storage
2728            .connection()
2729            .prepare("SELECT id, path, language, kind FROM files ORDER BY path")?;
2730        let rows = stmt.query_map([], |row| {
2731            let language: String = row.get(2)?;
2732            let kind: String = row.get(3)?;
2733            Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?, language, kind))
2734        })?;
2735        let mut files = Vec::new();
2736        for row in rows {
2737            let (id, path, language, kind) = row?;
2738            files.push(GraphReindexFile {
2739                id,
2740                path,
2741                language: language.parse()?,
2742                kind: kind.parse()?,
2743            });
2744        }
2745        Ok(files)
2746    }
2747
2748    fn indexed_files(&self) -> anyhow::Result<Vec<IndexedFile>> {
2749        let mut stmt =
2750            self.storage.connection().prepare("SELECT path, sha256 FROM files ORDER BY path")?;
2751        let rows =
2752            stmt.query_map([], |row| Ok(IndexedFile { path: row.get(0)?, sha256: row.get(1)? }))?;
2753        let mut files = Vec::new();
2754        for row in rows {
2755            files.push(row?);
2756        }
2757        Ok(files)
2758    }
2759
2760    fn indexed_file_count(&self) -> anyhow::Result<usize> {
2761        let count =
2762            self.storage
2763                .connection()
2764                .query_row("SELECT COUNT(*) FROM files", [], |row| row.get::<_, i64>(0))?;
2765        Ok(usize::try_from(count).unwrap_or(usize::MAX))
2766    }
2767
2768    fn content_revision(&self) -> anyhow::Result<String> {
2769        let value = self.storage.connection().query_row(
2770            "SELECT COALESCE(string_agg(path || ':' || sha256, ',' ORDER BY path), '') FROM files",
2771            [],
2772            |row| row.get::<_, String>(0),
2773        )?;
2774        Ok(hex_sha256(value.as_bytes()))
2775    }
2776}
2777
2778#[derive(Debug)]
2779struct FileRow {
2780    language: Language,
2781    kind: TargetKind,
2782}
2783
2784#[derive(Debug)]
2785struct GraphReindexFile {
2786    id: i64,
2787    path: String,
2788    language: Language,
2789    kind: TargetKind,
2790}
2791
2792#[derive(Debug)]
2793struct GraphPathRow {
2794    language: String,
2795    sha256: String,
2796    indexed_revision: String,
2797}
2798
2799fn rank_docs_for_symbol(symbol: &crate::query::symbol::SymbolHit, hits: &mut [SearchHit]) {
2800    let source_module = module_stem(&symbol.path);
2801    let symbol_name = symbol.name.to_ascii_lowercase();
2802    let qualified_name = symbol.qualified_name.to_ascii_lowercase();
2803    hits.sort_by(|a, b| {
2804        let a_rank = docs_locality_rank(symbol, &source_module, &symbol_name, &qualified_name, a);
2805        let b_rank = docs_locality_rank(symbol, &source_module, &symbol_name, &qualified_name, b);
2806        a_rank
2807            .cmp(&b_rank)
2808            .then_with(|| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal))
2809            .then_with(|| a.path.cmp(&b.path))
2810            .then_with(|| a.start_line.cmp(&b.start_line))
2811    });
2812    for (idx, hit) in hits.iter_mut().enumerate() {
2813        hit.score = (10_000usize.saturating_sub(idx)) as f64;
2814    }
2815}
2816
2817fn docs_locality_rank(
2818    symbol: &crate::query::symbol::SymbolHit,
2819    source_module: &str,
2820    symbol_name: &str,
2821    qualified_name: &str,
2822    hit: &SearchHit,
2823) -> u8 {
2824    let path = hit.path.to_ascii_lowercase();
2825    let summary = hit.summary.to_ascii_lowercase();
2826    let hit_symbol = hit.symbol_path.as_deref().unwrap_or_default().to_ascii_lowercase();
2827    if hit.path == symbol.path && hit_symbol == symbol.qualified_name.to_ascii_lowercase() {
2828        return 0;
2829    }
2830    if hit.path == symbol.path {
2831        return 1;
2832    }
2833    if !source_module.is_empty()
2834        && path.contains(source_module)
2835        && (summary.contains(symbol_name) || hit_symbol.contains(symbol_name))
2836    {
2837        return 2;
2838    }
2839    if summary.contains(qualified_name) || hit_symbol.contains(qualified_name) {
2840        return 3;
2841    }
2842    if summary.contains(symbol_name) || hit_symbol.contains(symbol_name) {
2843        return 4;
2844    }
2845    if !source_module.is_empty() && path.contains(source_module) {
2846        return 5;
2847    }
2848    9
2849}
2850
2851fn module_stem(path: &str) -> String {
2852    Path::new(path)
2853        .file_stem()
2854        .and_then(|value| value.to_str())
2855        .unwrap_or_default()
2856        .to_ascii_lowercase()
2857}
2858
2859fn dedupe_search_hits(hits: &mut Vec<SearchHit>) {
2860    let mut seen = BTreeSet::new();
2861    hits.retain(|hit| seen.insert(hit.chunk_id));
2862}
2863
2864fn bounded_summary(text: &str) -> String {
2865    text.split_whitespace().collect::<Vec<_>>().join(" ").chars().take(240).collect()
2866}
2867
2868#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
2869struct LogicalSymbolKey {
2870    language: String,
2871    path: String,
2872    name: String,
2873    qualified_name: String,
2874    kind: String,
2875}
2876
2877impl LogicalSymbolKey {
2878    fn from(row: &LogicalSymbolMemberRow) -> Self {
2879        Self {
2880            language: row.language.clone(),
2881            path: row.path.clone(),
2882            name: row.name.clone(),
2883            qualified_name: row.qualified_name.clone(),
2884            kind: row.kind.clone(),
2885        }
2886    }
2887}
2888
2889#[derive(Debug, Clone)]
2890struct LogicalSymbolMemberRow {
2891    symbol_id: i64,
2892    path: String,
2893    language: String,
2894    name: String,
2895    qualified_name: String,
2896    kind: String,
2897    signature: Option<String>,
2898    start_line: i64,
2899    end_line: i64,
2900}
2901
2902fn symbol_line_for_byte(
2903    text: &str,
2904    chunk_start_byte: usize,
2905    chunk_start_line: i64,
2906    byte: usize,
2907) -> i64 {
2908    if byte <= chunk_start_byte {
2909        return chunk_start_line.max(1);
2910    }
2911    let local = byte.saturating_sub(chunk_start_byte).min(text.len());
2912    chunk_start_line
2913        + i64::try_from(text[..local].bytes().filter(|byte| *byte == b'\n').count()).unwrap_or(0)
2914}
2915
2916fn graph_only_reason(edge: &crate::query::graph::GraphHop, current_line: Option<&str>) -> String {
2917    let Some(line) = current_line else {
2918        return "missing_current_source_line".to_string();
2919    };
2920    if edge
2921        .target_qualified_name
2922        .as_deref()
2923        .is_some_and(|qualified| !qualified.is_empty() && line.contains(qualified))
2924    {
2925        return "qualified_call_pattern_mismatch".to_string();
2926    }
2927    if edge.target.as_deref().is_some_and(|target| !target.is_empty() && line.contains(target)) {
2928        return "imported_or_unqualified_call".to_string();
2929    }
2930    if edge
2931        .evidence
2932        .as_deref()
2933        .is_some_and(|evidence| !evidence.is_empty() && line.contains(evidence.trim()))
2934    {
2935        return "regex_too_narrow".to_string();
2936    }
2937    "stale_or_overbroad_graph_edge".to_string()
2938}
2939
2940fn is_likely_false_positive_graph_only(
2941    edge: &crate::query::graph::GraphHop,
2942    graph_only: &crate::query::graph::GraphOnlyEdge,
2943) -> bool {
2944    if graph_only.likely_reason == "stale_or_overbroad_graph_edge" {
2945        return true;
2946    }
2947    edge.resolution == "target_name_fallback"
2948        || edge.confidence == "NameOnly"
2949        || edge.confidence == "Ambiguous"
2950        || !edge.verified_target_symbol
2951}
2952
2953fn classify_text_only_hit(
2954    path: &str,
2955    text: &str,
2956    parser_failure_paths: &BTreeSet<String>,
2957) -> &'static str {
2958    if parser_failure_paths.contains(path) {
2959        return "parser_failure";
2960    }
2961    if is_generated_path(path) {
2962        return "generated_text_mention";
2963    }
2964    let trimmed = text.trim_start();
2965    if is_comment_like_text(trimmed) {
2966        return "comment_text_mention";
2967    }
2968    if is_import_or_declaration_text(trimmed) {
2969        return "declaration_text_mention";
2970    }
2971    if is_test_like_path(path) && is_test_scaffolding_text(trimmed) {
2972        return "test_scaffolding_text_mention";
2973    }
2974    "parser_call_extraction"
2975}
2976
2977fn is_likely_parser_gap_kind(kind: &str) -> bool {
2978    matches!(kind, "parser_call_extraction" | "parser_failure")
2979}
2980
2981fn is_generated_path(path: &str) -> bool {
2982    path.contains("/generated/")
2983        || path.contains("/generated-web/")
2984        || path.ends_with(".d.ts")
2985        || path.ends_with("_bg.wasm.d.ts")
2986}
2987
2988fn is_comment_like_text(text: &str) -> bool {
2989    text.starts_with("//")
2990        || text.starts_with("/*")
2991        || text.starts_with('*')
2992        || text.starts_with("*/")
2993        || text.starts_with("#")
2994}
2995
2996fn is_import_or_declaration_text(text: &str) -> bool {
2997    text.starts_with("import ")
2998        || text.starts_with("export type ")
2999        || text.starts_with("export interface ")
3000        || text.starts_with("type ")
3001        || text.starts_with("interface ")
3002        || text.starts_with("declare ")
3003}
3004
3005fn is_test_scaffolding_text(text: &str) -> bool {
3006    text.contains(".mock")
3007        || text.contains("jest.")
3008        || text.contains("jest<")
3009        || text.contains("expect(")
3010        || text.contains("toHaveBeen")
3011        || text.contains("describe(")
3012        || text.contains("it(")
3013        || text.contains("test(")
3014}
3015
3016fn recommended_graph_text_fallback(
3017    parser_gaps: &[crate::query::graph::TextOnlyHit],
3018    graph_only_edges: &[crate::query::graph::GraphOnlyEdge],
3019) -> String {
3020    match (parser_gaps.is_empty(), graph_only_edges.is_empty()) {
3021        (false, false) => "both",
3022        (false, true) => "text",
3023        (true, false) => "graph",
3024        (true, true) => "none",
3025    }
3026    .to_string()
3027}
3028
3029fn compare_pattern_match_mode(pattern: &str, symbol_name: &str) -> String {
3030    if symbol_name.is_empty() {
3031        return "regex".to_string();
3032    }
3033    let escaped_call = format!("{symbol_name}\\(");
3034    let plain_call = format!("{symbol_name}(");
3035    if pattern.contains("\\b")
3036        || pattern.contains("\\W")
3037        || pattern.contains("[^")
3038        || pattern.contains(&escaped_call)
3039        || pattern.contains(&plain_call)
3040    {
3041        return "identifier_or_call".to_string();
3042    }
3043    if pattern.contains(symbol_name) {
3044        return "substring_identifier".to_string();
3045    }
3046    "regex".to_string()
3047}
3048
3049fn is_test_like_path(path: &str) -> bool {
3050    let lower = path.to_ascii_lowercase();
3051    lower.contains("/test/")
3052        || lower.contains("/tests/")
3053        || lower.contains("/__tests__/")
3054        || lower.ends_with("_test.rs")
3055        || lower.ends_with(".test.ts")
3056        || lower.ends_with(".test.tsx")
3057        || lower.ends_with(".spec.ts")
3058        || lower.ends_with(".spec.tsx")
3059}
3060
3061#[derive(Debug)]
3062struct IndexedFile {
3063    path: String,
3064    sha256: String,
3065}
3066
3067#[derive(Debug, Clone)]
3068struct IndexFile {
3069    full_path: PathBuf,
3070    relative_path: PathBuf,
3071    language: Language,
3072    kind: TargetKind,
3073    commit_sha: String,
3074    worktree_id: String,
3075}
3076
3077#[derive(Debug, Clone)]
3078struct FileScope {
3079    commit_sha: String,
3080    worktree_id: String,
3081}
3082
3083impl FileScope {
3084    fn commit(commit_sha: String) -> Self {
3085        Self { commit_sha, worktree_id: String::new() }
3086    }
3087
3088    fn worktree(worktree_id: String) -> Self {
3089        Self { commit_sha: String::new(), worktree_id }
3090    }
3091}
3092
3093#[derive(Debug)]
3094struct PreparedIndexFile {
3095    file: IndexFile,
3096    prepared: anyhow::Result<PreparedIndexContent>,
3097}
3098
3099#[derive(Debug)]
3100struct PreparedIndexContent {
3101    modified_at_ms: i64,
3102    text: String,
3103    sha256: String,
3104    chunks: Vec<Chunk>,
3105    symbols: Vec<Symbol>,
3106    parser_failure: Option<String>,
3107}
3108
3109#[derive(Debug)]
3110struct DiscoveryPlan {
3111    files: Vec<IndexFile>,
3112    deleted: BTreeSet<PathBuf>,
3113    unindexed: Vec<IndexFile>,
3114    changed: Vec<PathBuf>,
3115    discovered_files: usize,
3116    indexed_files: usize,
3117}
3118
3119#[derive(Debug, Default)]
3120struct GitChangedPaths {
3121    changed: BTreeSet<PathBuf>,
3122    deleted: BTreeSet<PathBuf>,
3123}
3124
3125fn collect_index_files(config: &Config) -> anyhow::Result<Vec<IndexFile>> {
3126    let mut targets = config.targets.iter().collect::<Vec<_>>();
3127    targets.sort_by_key(|target| match target.kind {
3128        TargetKind::Generated => 0,
3129        TargetKind::Tests => 1,
3130        TargetKind::Docs => 2,
3131        TargetKind::Source => 3,
3132    });
3133    let mut seen = BTreeSet::new();
3134    let mut files = Vec::new();
3135
3136    for target in targets {
3137        for file in walker::walk_target(&config.root, target)? {
3138            let relative_path = file.strip_prefix(&config.root)?.to_path_buf();
3139            if !seen.insert(relative_path.clone()) {
3140                continue;
3141            }
3142            files.push(IndexFile {
3143                full_path: file,
3144                relative_path,
3145                language: target.language,
3146                kind: target.kind,
3147                commit_sha: String::new(),
3148                worktree_id: String::new(),
3149            });
3150        }
3151    }
3152
3153    Ok(files)
3154}
3155
3156fn collect_changed_index_files(
3157    config: &Config,
3158    changes: &GitChangedPaths,
3159) -> anyhow::Result<Vec<IndexFile>> {
3160    let mut files = Vec::new();
3161    for relative_path in &changes.changed {
3162        let full_path = config.root.join(relative_path);
3163        if !full_path.is_file() {
3164            continue;
3165        }
3166        let Some((language, kind)) = target_for_path(config, relative_path) else {
3167            continue;
3168        };
3169        files.push(IndexFile {
3170            full_path,
3171            relative_path: relative_path.clone(),
3172            language,
3173            kind,
3174            commit_sha: String::new(),
3175            worktree_id: String::new(),
3176        });
3177    }
3178    Ok(files)
3179}
3180
3181fn spawn_git_history_prepare(
3182    root: &Path,
3183) -> JoinHandle<anyhow::Result<git_history::PreparedGitHistory>> {
3184    let root = root.to_path_buf();
3185    thread::spawn(move || git_history::prepare(&root))
3186}
3187
3188fn join_git_history_prepare(
3189    handle: JoinHandle<anyhow::Result<git_history::PreparedGitHistory>>,
3190) -> anyhow::Result<git_history::PreparedGitHistory> {
3191    handle.join().map_err(|_| anyhow::anyhow!("git history preparation panicked"))?
3192}
3193
3194fn prepare_index_file(file: &IndexFile) -> PreparedIndexFile {
3195    PreparedIndexFile { file: file.clone(), prepared: prepare_index_content(file) }
3196}
3197
3198fn prepare_files_with_progress<F>(
3199    files: &[IndexFile],
3200    progress: &mut F,
3201) -> anyhow::Result<Vec<PreparedIndexFile>>
3202where
3203    F: FnMut(IndexProgress),
3204{
3205    #[derive(Debug)]
3206    struct PreparedProgress {
3207        current: usize,
3208        total: usize,
3209        path: PathBuf,
3210        language: Language,
3211        kind: TargetKind,
3212    }
3213
3214    let total = files.len();
3215    let prepared = thread::scope(|scope| {
3216        let (tx, rx) = mpsc::channel();
3217        let completed = AtomicUsize::new(0);
3218        let handle = scope.spawn(move || {
3219            files
3220                .par_iter()
3221                .map(|file| {
3222                    let prepared = prepare_index_file(file);
3223                    let current = completed.fetch_add(1, Ordering::Relaxed) + 1;
3224                    if should_report_file_progress(current, total) {
3225                        let _ = tx.send(PreparedProgress {
3226                            current,
3227                            total,
3228                            path: file.relative_path.clone(),
3229                            language: file.language,
3230                            kind: file.kind,
3231                        });
3232                    }
3233                    prepared
3234                })
3235                .collect::<Vec<_>>()
3236        });
3237
3238        for event in rx {
3239            progress(IndexProgress::PreparingFile {
3240                current: event.current,
3241                total: event.total,
3242                path: event.path,
3243                language: event.language,
3244                kind: event.kind,
3245            });
3246        }
3247
3248        handle.join().map_err(|_| anyhow::anyhow!("parallel file preparation panicked"))
3249    })?;
3250    Ok(prepared)
3251}
3252
3253fn should_report_file_progress(current: usize, total: usize) -> bool {
3254    if total == 0 {
3255        return false;
3256    }
3257    current == 1
3258        || current == total
3259        || current.saturating_mul(10) / total
3260            != current.saturating_sub(1).saturating_mul(10) / total
3261}
3262
3263fn prepare_index_content(file: &IndexFile) -> anyhow::Result<PreparedIndexContent> {
3264    let text = fs::read_to_string(&file.full_path)?;
3265    let modified_at_ms = file_metadata_ms(&file.full_path)?;
3266    let sha256 = hex_sha256(text.as_bytes());
3267    let parser_failure =
3268        if file.language != Language::Markdown && file.kind != TargetKind::Generated {
3269            if text.len() > chunker::MAX_STRUCTURAL_PARSE_BYTES {
3270                None
3271            } else {
3272                parser::parse_error(&file.relative_path, file.language, &text)
3273                    .unwrap_or_else(|err| Some(err.to_string()))
3274            }
3275        } else {
3276            None
3277        };
3278    let chunks = if file.kind == TargetKind::Generated {
3279        chunker::generated_chunks_for_file(&file.relative_path, &text)
3280    } else {
3281        chunker::chunks_for_file(&file.relative_path, file.language, &text)
3282    };
3283    let symbols =
3284        if file.kind == TargetKind::Generated || text.len() > chunker::MAX_STRUCTURAL_PARSE_BYTES {
3285            Vec::new()
3286        } else {
3287            symbols::symbols_for_file(&file.relative_path, file.language, &text)
3288        };
3289    Ok(PreparedIndexContent { modified_at_ms, text, sha256, chunks, symbols, parser_failure })
3290}
3291
3292fn discovery_plan(conn: &rusqlite::Connection, config: &Config) -> anyhow::Result<DiscoveryPlan> {
3293    let discovered = collect_index_files(config)?;
3294    let mut indexed = indexed_file_map(conn)?;
3295    let mut current_paths = BTreeSet::new();
3296    let mut files = Vec::new();
3297    let mut unindexed = Vec::new();
3298    let mut changed = Vec::new();
3299    let discovered_files = discovered.len();
3300    let hashed = discovered
3301        .par_iter()
3302        .map(|file| -> anyhow::Result<(IndexFile, String)> {
3303            let text = fs::read(&file.full_path)?;
3304            Ok((file.clone(), hex_sha256(&text)))
3305        })
3306        .collect::<Vec<_>>();
3307
3308    for hashed_file in hashed {
3309        let (file, current_hash) = hashed_file?;
3310        let relative = path_string(&file.relative_path);
3311        current_paths.insert(file.relative_path.clone());
3312        let Some(indexed_hash) = indexed.remove(&relative) else {
3313            unindexed.push(file.clone());
3314            files.push(file);
3315            continue;
3316        };
3317        if current_hash != indexed_hash {
3318            changed.push(file.relative_path.clone());
3319            files.push(file);
3320        }
3321    }
3322
3323    let deleted = indexed
3324        .into_keys()
3325        .map(PathBuf::from)
3326        .filter(|path| !current_paths.contains(path))
3327        .collect::<BTreeSet<_>>();
3328
3329    Ok(DiscoveryPlan {
3330        discovered_files,
3331        indexed_files: current_paths
3332            .len()
3333            .saturating_add(deleted.len())
3334            .saturating_sub(unindexed.len()),
3335        files,
3336        deleted,
3337        unindexed,
3338        changed,
3339    })
3340}
3341
3342fn indexed_file_map(conn: &rusqlite::Connection) -> anyhow::Result<BTreeMap<String, String>> {
3343    let mut stmt = conn.prepare("SELECT path, sha256 FROM files ORDER BY path")?;
3344    let rows =
3345        stmt.query_map([], |row| Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)))?;
3346    let mut files = BTreeMap::new();
3347    for row in rows {
3348        let (path, sha256) = row?;
3349        files.insert(path, sha256);
3350    }
3351    Ok(files)
3352}
3353
3354fn target_for_path(config: &Config, relative_path: &Path) -> Option<(Language, TargetKind)> {
3355    let relative = path_string(relative_path);
3356    let language = Language::from_path(relative_path)?;
3357    let mut targets = config.targets.iter().collect::<Vec<_>>();
3358    targets.sort_by_key(|target| match target.kind {
3359        TargetKind::Generated => 0,
3360        TargetKind::Tests => 1,
3361        TargetKind::Docs => 2,
3362        TargetKind::Source => 3,
3363    });
3364    targets.into_iter().find_map(|target| {
3365        if target.language != language {
3366            return None;
3367        }
3368        if !target.directories.iter().any(|directory| {
3369            directory.as_os_str().is_empty()
3370                || directory == Path::new(".")
3371                || relative_path.starts_with(directory)
3372        }) {
3373            return None;
3374        }
3375        if target.exclude.iter().any(|pattern| matches_simple_pattern(&relative, pattern)) {
3376            return None;
3377        }
3378        if !target.include.iter().any(|pattern| matches_simple_pattern(&relative, pattern)) {
3379            return None;
3380        }
3381        Some((target.language, target.kind))
3382    })
3383}
3384
3385fn git_changed_paths(root: &Path) -> anyhow::Result<GitChangedPaths> {
3386    let repo = gix::discover(root)?;
3387    let worktree_root = repo
3388        .workdir()
3389        .ok_or_else(|| anyhow::anyhow!("git repository has no worktree"))?
3390        .to_path_buf();
3391    let pathspec = config_root_pathspec(&worktree_root, root);
3392    let mut paths = GitChangedPaths::default();
3393
3394    for item in repo
3395        .status(gix::progress::Discard)?
3396        .untracked_files(UntrackedFiles::Files)
3397        .tree_index_track_renames(tree_index::TrackRenames::Disabled)
3398        .into_iter([pathspec])?
3399    {
3400        let item = item?;
3401        let Some(path) = repo_relative_path_to_config_path(&worktree_root, root, item.location())
3402        else {
3403            continue;
3404        };
3405        if root.join(&path).exists() {
3406            if !paths.deleted.contains(&path) {
3407                paths.changed.insert(path);
3408            }
3409        } else {
3410            paths.changed.remove(&path);
3411            paths.deleted.insert(path);
3412        }
3413    }
3414
3415    Ok(paths)
3416}
3417
3418fn repo_relative_path_to_config_path(
3419    worktree_root: &Path,
3420    config_root: &Path,
3421    repo_relative_path: &gix::bstr::BStr,
3422) -> Option<PathBuf> {
3423    let path = PathBuf::from(repo_relative_path.to_str_lossy().as_ref());
3424    worktree_root.join(path).strip_prefix(config_root).ok().map(Path::to_path_buf)
3425}
3426
3427fn config_root_pathspec(worktree_root: &Path, config_root: &Path) -> BString {
3428    let relative = config_root.strip_prefix(worktree_root).unwrap_or_else(|_| Path::new(""));
3429    let relative = path_string(relative);
3430    if relative.is_empty() || relative == "." {
3431        BString::from("*")
3432    } else {
3433        BString::from(format!("{relative}/**"))
3434    }
3435}
3436
3437fn matches_simple_pattern(path: &str, pattern: &str) -> bool {
3438    if let Some(extension) = pattern.strip_prefix("**/*.") {
3439        return path.ends_with(&format!(".{extension}"));
3440    }
3441    if let Some(prefix) = pattern.strip_suffix("/**") {
3442        return path.starts_with(prefix);
3443    }
3444    path == pattern || path.contains(pattern.trim_matches('*'))
3445}
3446
3447fn meta_for(conn: &rusqlite::Connection, key: &str) -> anyhow::Result<Option<String>> {
3448    Ok(conn
3449        .query_row("SELECT value FROM index_meta WHERE key = ?1", [key], |row| row.get(0))
3450        .optional()?)
3451}
3452
3453fn git_output(root: &Path, args: &[&str]) -> Option<String> {
3454    let output = Command::new("git").args(args).current_dir(root).output().ok()?;
3455    if !output.status.success() {
3456        return None;
3457    }
3458    Some(String::from_utf8_lossy(&output.stdout).trim().to_string())
3459}
3460
3461fn resolve_git_context(root: &Path) -> (String, String) {
3462    let commit_sha =
3463        git_output(root, &["rev-parse", "HEAD"]).map(|s| s.trim().to_string()).unwrap_or_default();
3464    let worktree_id = root.to_string_lossy().trim_end_matches('/').to_string();
3465    (commit_sha, worktree_id)
3466}
3467
3468fn file_metadata_ms(path: &Path) -> anyhow::Result<i64> {
3469    let modified = fs::metadata(path)?.modified()?;
3470    Ok(duration_ms(modified.duration_since(UNIX_EPOCH)?))
3471}
3472
3473fn now_ms() -> i64 {
3474    duration_ms(SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default())
3475}
3476
3477fn duration_ms(duration: std::time::Duration) -> i64 {
3478    i64::try_from(duration.as_millis()).unwrap_or(i64::MAX)
3479}
3480
3481fn hex_sha256(bytes: &[u8]) -> String {
3482    let hash = Sha256::digest(bytes);
3483    let mut out = String::with_capacity(hash.len() * 2);
3484    for byte in hash {
3485        use std::fmt::Write as _;
3486        let _ = write!(out, "{byte:02x}");
3487    }
3488    out
3489}
3490
3491fn path_string(path: &Path) -> String {
3492    path.to_string_lossy().replace('\\', "/")
3493}
3494
3495#[cfg(test)]
3496mod schema_bootstrap_tests {
3497    use std::sync::atomic::{AtomicU64, Ordering};
3498
3499    use super::*;
3500    use crate::config::ResolvedTarget;
3501
3502    static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
3503
3504    #[test]
3505    fn rebuild_bootstraps_sqlite_schema_for_empty_target_root() {
3506        let root = unique_temp_root();
3507        let _ = fs::remove_dir_all(&root);
3508        let docs = root.join("docs");
3509        fs::create_dir_all(&docs).unwrap();
3510
3511        let config = Config {
3512            root: root.clone(),
3513            database: root.join(".rag-rat/index.sqlite"),
3514            targets: vec![ResolvedTarget {
3515                name: "markdown".to_string(),
3516                language: Language::Markdown,
3517                directories: vec![PathBuf::from("docs")],
3518                include: vec!["**/*.md".to_string()],
3519                exclude: Vec::new(),
3520                kind: TargetKind::Docs,
3521            }],
3522            local_ai: Default::default(),
3523        };
3524
3525        let db = IndexDatabase::rebuild(&config).unwrap();
3526        assert!(config.database.exists());
3527        assert_eq!(table_count(&db, "files"), 1);
3528        assert_eq!(table_count(&db, "chunks"), 1);
3529        assert_eq!(table_count(&db, "symbols"), 1);
3530        assert_eq!(table_count(&db, "parser_failures"), 1);
3531        assert_eq!(table_count(&db, "index_meta"), 1);
3532        assert_eq!(table_count(&db, "chunk_fts"), 1);
3533        assert_eq!(table_count(&db, "git_commits"), 1);
3534        assert_eq!(table_count(&db, "git_file_changes"), 1);
3535        assert_eq!(table_count(&db, "git_chunk_blame"), 1);
3536        assert_eq!(table_count(&db, "commit_fts"), 1);
3537        assert_eq!(table_count(&db, "ai_models"), 1);
3538        assert_eq!(table_count(&db, "chunk_embeddings"), 1);
3539        assert_eq!(table_count(&db, "chunk_summaries"), 1);
3540        assert_eq!(table_count(&db, "reconcile_meta"), 1);
3541        assert_eq!(table_count(&db, "reconcile_attempts"), 1);
3542        assert!(file_columns(&db).contains(&"indexed_revision".to_string()));
3543        assert_eq!(indexed_revision_count(&db), 0);
3544        assert!(chunk_columns(&db).contains(&"anchor_version".to_string()));
3545        assert!(chunk_columns(&db).contains(&"normalized_hash".to_string()));
3546        assert!(chunk_columns(&db).contains(&"start_boundary_hash".to_string()));
3547        assert!(chunk_columns(&db).contains(&"end_boundary_hash".to_string()));
3548        assert!(chunk_columns(&db).contains(&"source_revision".to_string()));
3549        let embedding_columns = table_columns(&db, "chunk_embeddings");
3550        assert!(embedding_columns.contains(&"model_version".to_string()));
3551        assert!(embedding_columns.contains(&"input_hash".to_string()));
3552        assert!(embedding_columns.contains(&"embedding_text_version".to_string()));
3553        assert!(embedding_columns.contains(&"embedding_policy".to_string()));
3554        assert!(embedding_columns.contains(&"embedding_priority".to_string()));
3555        assert!(embedding_columns.contains(&"input_chars".to_string()));
3556        assert!(embedding_columns.contains(&"input_truncated".to_string()));
3557        assert!(embedding_columns.contains(&"attempt_count".to_string()));
3558        assert!(embedding_columns.contains(&"next_retry_after_ms".to_string()));
3559        assert!(embedding_columns.contains(&"computed_at_ms".to_string()));
3560        let edge_columns = table_columns(&db, "edges");
3561        assert!(edge_columns.contains(&"source_start_line".to_string()));
3562        assert!(edge_columns.contains(&"source_end_line".to_string()));
3563        assert!(edge_columns.contains(&"source_start_byte".to_string()));
3564        assert!(edge_columns.contains(&"source_end_byte".to_string()));
3565        assert!(edge_columns.contains(&"target_start_line".to_string()));
3566        assert!(edge_columns.contains(&"target_end_line".to_string()));
3567        assert!(edge_columns.contains(&"target_qualified_name".to_string()));
3568        assert!(edge_columns.contains(&"evidence".to_string()));
3569        assert!(edge_columns.contains(&"receiver_hint".to_string()));
3570        assert!(edge_columns.contains(&"resolution".to_string()));
3571        let logical_columns = table_columns(&db, "logical_symbols");
3572        assert!(logical_columns.contains(&"qualified_name".to_string()));
3573        assert!(logical_columns.contains(&"variant_count".to_string()));
3574        let member_columns = table_columns(&db, "logical_symbol_members");
3575        assert!(member_columns.contains(&"symbol_id".to_string()));
3576        assert!(member_columns.contains(&"signature_hash".to_string()));
3577        let github_ref_sync_columns = table_columns(&db, "github_ref_sync");
3578        assert!(github_ref_sync_columns.contains(&"status".to_string()));
3579        assert!(github_ref_sync_columns.contains(&"last_error".to_string()));
3580        let symbol_fact_columns = table_columns(&db, "symbol_facts");
3581        assert!(symbol_fact_columns.contains(&"fact_kind".to_string()));
3582        assert!(symbol_fact_columns.contains(&"fact_value".to_string()));
3583        assert_eq!(
3584            db.status(&config.database).unwrap().schema.current_version,
3585            schema::LATEST_SCHEMA_VERSION
3586        );
3587
3588        fs::remove_dir_all(root).unwrap();
3589    }
3590
3591    #[test]
3592    fn rebuild_reports_file_preparation_progress() {
3593        let root = unique_temp_root();
3594        let _ = fs::remove_dir_all(&root);
3595        fs::create_dir_all(root.join("src")).unwrap();
3596        fs::write(root.join("src/lib.rs"), "pub fn exported() {}\n").unwrap();
3597
3598        let config = source_config(root.clone(), Language::Rust);
3599        let mut events = Vec::new();
3600        IndexDatabase::rebuild_with_progress(&config, |progress| events.push(progress)).unwrap();
3601
3602        assert!(
3603            events.iter().any(|event| matches!(event, IndexProgress::PreparingFile { .. })),
3604            "missing preparing progress event: {events:?}"
3605        );
3606        assert!(
3607            events.iter().any(|event| matches!(event, IndexProgress::IndexingFile { .. })),
3608            "missing indexing progress event: {events:?}"
3609        );
3610
3611        fs::remove_dir_all(root).unwrap();
3612    }
3613
3614    #[test]
3615    fn file_progress_reports_first_final_and_decile_boundaries() {
3616        let reported = (1..=100)
3617            .filter(|current| should_report_file_progress(*current, 100))
3618            .collect::<Vec<_>>();
3619        assert_eq!(reported, vec![1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]);
3620    }
3621
3622    #[test]
3623    fn compatible_open_requires_recorded_schema_version() {
3624        let root = unique_temp_root();
3625        let _ = fs::remove_dir_all(&root);
3626        fs::create_dir_all(root.join(".rag-rat")).unwrap();
3627        let database = root.join(".rag-rat/index.sqlite");
3628        IndexDatabase::migrate(&database).unwrap();
3629        let conn = rusqlite::Connection::open(&database).unwrap();
3630        conn.execute_batch("DROP TABLE schema_version;").unwrap();
3631        drop(conn);
3632
3633        let status = IndexDatabase::migration_check(&database).unwrap();
3634        assert_eq!(status.state, schema::SchemaState::Older);
3635        let err = IndexDatabase::open(&database).unwrap_err().to_string();
3636        assert!(err.contains("run `rag-rat migrate`"), "{err}");
3637
3638        let migrated = IndexDatabase::migrate(&database).unwrap();
3639        assert_eq!(migrated.state, schema::SchemaState::Compatible);
3640        IndexDatabase::open(&database).unwrap();
3641
3642        fs::remove_dir_all(root).unwrap();
3643    }
3644
3645    #[test]
3646    fn migrate_adds_edge_name_columns_before_indexing_them() {
3647        let root = unique_temp_root();
3648        let _ = fs::remove_dir_all(&root);
3649        fs::create_dir_all(root.join(".rag-rat")).unwrap();
3650        let database = root.join(".rag-rat/index.sqlite");
3651        let conn = rusqlite::Connection::open(&database).unwrap();
3652        conn.execute_batch(
3653            "
3654            CREATE TABLE files(
3655                id INTEGER PRIMARY KEY AUTOINCREMENT,
3656                path TEXT NOT NULL UNIQUE,
3657                language TEXT NOT NULL,
3658                kind TEXT NOT NULL,
3659                sha256 TEXT NOT NULL,
3660                modified_at_ms INTEGER NOT NULL,
3661                generated INTEGER NOT NULL DEFAULT 0,
3662                indexed_at_ms INTEGER NOT NULL
3663            );
3664            CREATE TABLE chunks(
3665                id INTEGER PRIMARY KEY AUTOINCREMENT,
3666                file_id INTEGER NOT NULL,
3667                chunk_kind TEXT NOT NULL,
3668                symbol_path TEXT,
3669                start_byte INTEGER NOT NULL,
3670                end_byte INTEGER NOT NULL,
3671                start_line INTEGER NOT NULL,
3672                end_line INTEGER NOT NULL,
3673                text TEXT NOT NULL,
3674                text_hash TEXT NOT NULL
3675            );
3676            CREATE TABLE symbols(
3677                id INTEGER PRIMARY KEY AUTOINCREMENT,
3678                file_id INTEGER NOT NULL,
3679                language TEXT NOT NULL,
3680                name TEXT NOT NULL,
3681                qualified_name TEXT NOT NULL,
3682                kind TEXT NOT NULL,
3683                start_byte INTEGER NOT NULL,
3684                end_byte INTEGER NOT NULL,
3685                signature TEXT,
3686                docs TEXT
3687            );
3688            CREATE TABLE edges(
3689                id INTEGER PRIMARY KEY AUTOINCREMENT,
3690                from_symbol_id INTEGER,
3691                to_symbol_id INTEGER,
3692                edge_kind TEXT NOT NULL,
3693                confidence TEXT NOT NULL
3694            );
3695            ",
3696        )
3697        .unwrap();
3698        drop(conn);
3699
3700        let migrated = IndexDatabase::migrate(&database).unwrap();
3701        assert_eq!(migrated.state, schema::SchemaState::Compatible);
3702        let db = IndexDatabase::open(&database).unwrap();
3703        let columns = table_columns(&db, "edges");
3704        assert!(columns.contains(&"from_name".to_string()));
3705        assert!(columns.contains(&"to_name".to_string()));
3706        assert!(columns.contains(&"source_start_line".to_string()));
3707        assert!(columns.contains(&"source_end_line".to_string()));
3708        assert!(columns.contains(&"source_start_byte".to_string()));
3709        assert!(columns.contains(&"source_end_byte".to_string()));
3710        assert!(columns.contains(&"target_start_line".to_string()));
3711        assert!(columns.contains(&"target_end_line".to_string()));
3712        assert_eq!(table_count(&db, "idx_edges_from_name"), 1);
3713        assert_eq!(table_count(&db, "idx_edges_to_name"), 1);
3714
3715        fs::remove_dir_all(root).unwrap();
3716    }
3717
3718    #[test]
3719    fn migrate_preserves_github_papertrail_cache() {
3720        let (root, config) =
3721            markdown_config("# Decision\nRefs cq27-dev/rag-rat#42\nwe will keep sqlite\n");
3722        let db = IndexDatabase::rebuild(&config).unwrap();
3723        github::sync_from_refs(db.storage.connection(), &root, Some(&MockGitHubClient), false)
3724            .unwrap();
3725        assert_eq!(row_count(&db, "github_refs"), 1);
3726        assert_eq!(row_count(&db, "github_issues"), 1);
3727        assert_eq!(row_count(&db, "github_comments"), 1);
3728        assert_eq!(row_count(&db, "github_pull_requests"), 1);
3729        assert_eq!(row_count(&db, "github_reviews"), 1);
3730        assert_eq!(row_count(&db, "github_review_comments"), 1);
3731        assert_eq!(row_count(&db, "github_fts"), 5);
3732        db.storage
3733            .connection()
3734            .execute("DELETE FROM schema_version WHERE id = ?1", ["010_symbol_facts"])
3735            .unwrap();
3736        drop(db);
3737
3738        let migrated = IndexDatabase::migrate(&config.database).unwrap();
3739        assert_eq!(migrated.state, schema::SchemaState::Compatible);
3740        let db = IndexDatabase::open(&config.database).unwrap();
3741        assert_eq!(row_count(&db, "github_refs"), 1);
3742        assert_eq!(row_count(&db, "github_issues"), 1);
3743        assert_eq!(row_count(&db, "github_comments"), 1);
3744        assert_eq!(row_count(&db, "github_pull_requests"), 1);
3745        assert_eq!(row_count(&db, "github_reviews"), 1);
3746        assert_eq!(row_count(&db, "github_review_comments"), 1);
3747        assert_eq!(row_count(&db, "github_fts"), 5);
3748        let hits = db.github_issue_search("sqlite", 10).unwrap();
3749        assert_eq!(hits.len(), 1);
3750        assert_eq!(hits[0].number, 42);
3751
3752        fs::remove_dir_all(root).unwrap();
3753    }
3754
3755    #[test]
3756    fn full_rebuild_preserves_github_papertrail_cache() {
3757        let (root, config) =
3758            markdown_config("# Decision\nRefs cq27-dev/rag-rat#42\nwe will keep sqlite\n");
3759        let db = IndexDatabase::rebuild(&config).unwrap();
3760        github::sync_from_refs(db.storage.connection(), &root, Some(&MockGitHubClient), false)
3761            .unwrap();
3762        assert_eq!(row_count(&db, "github_issues"), 1);
3763        assert_eq!(row_count(&db, "github_fts"), 5);
3764        drop(db);
3765
3766        let db = IndexDatabase::rebuild(&config).unwrap();
3767
3768        assert_eq!(row_count(&db, "github_refs"), 1);
3769        assert_eq!(row_count(&db, "github_issues"), 1);
3770        assert_eq!(row_count(&db, "github_comments"), 1);
3771        assert_eq!(row_count(&db, "github_pull_requests"), 1);
3772        assert_eq!(row_count(&db, "github_reviews"), 1);
3773        assert_eq!(row_count(&db, "github_review_comments"), 1);
3774        assert_eq!(row_count(&db, "github_ref_sync"), 1);
3775        assert_eq!(row_count(&db, "github_fts"), 5);
3776        let hits = db.github_issue_search("sqlite", 10).unwrap();
3777        assert_eq!(hits.len(), 1);
3778        assert_eq!(hits[0].number, 42);
3779
3780        fs::remove_dir_all(root).unwrap();
3781    }
3782
3783    #[test]
3784    fn full_rebuild_preserves_installed_model_manifest() {
3785        let (root, config) = markdown_config("alpha token with enough detail for embeddings\n");
3786        let db = IndexDatabase::rebuild(&config).unwrap();
3787        db.install_model(ai::HASH_MODEL_ID).unwrap();
3788        let before = db.local_ai_status().unwrap();
3789        assert_eq!(before.embedding.model_id, ai::HASH_MODEL_ID);
3790        assert!(before.embedding.installed);
3791        drop(db);
3792
3793        let db = IndexDatabase::rebuild(&config).unwrap();
3794
3795        let after = db.local_ai_status().unwrap();
3796        assert_eq!(after.embedding.model_id, ai::HASH_MODEL_ID);
3797        assert!(after.embedding.installed);
3798        assert_eq!(after.embedding.state, "Ready");
3799
3800        fs::remove_dir_all(root).unwrap();
3801    }
3802
3803    #[test]
3804    fn full_rebuild_preserves_other_worktree_contexts() {
3805        let root = unique_temp_root();
3806        let _ = fs::remove_dir_all(&root);
3807        fs::create_dir_all(root.join("src")).unwrap();
3808        fs::write(root.join("src/lib.rs"), "pub fn current_context() {}\n").unwrap();
3809        let config = source_config(root.clone(), Language::Rust);
3810        let db = IndexDatabase::rebuild(&config).unwrap();
3811        let other_file_id = db
3812            .storage
3813            .connection()
3814            .query_row(
3815                "
3816                INSERT INTO main.files(
3817                    path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms,
3818                    indexed_revision, commit_sha, worktree_id
3819                )
3820                VALUES ('src/other.rs', 'rust', 'source', 'other-sha', 0, 0, 1, 'other-sha', '', 'other-worktree')
3821                RETURNING id
3822                ",
3823                [],
3824                |row| row.get::<_, i64>(0),
3825            )
3826            .unwrap();
3827        let other_chunk_id = db
3828            .storage
3829            .connection()
3830            .query_row(
3831                "
3832                INSERT INTO main.chunks(
3833                    file_id, chunk_kind, symbol_path, start_byte, end_byte, start_line, end_line,
3834                    text, text_hash, source_revision, anchor_version, normalized_hash,
3835                    start_boundary_hash, end_boundary_hash, start_context_hash, end_context_hash,
3836                    context_radius, embedding_policy, embedding_priority
3837                )
3838                VALUES (?1, 'symbol', 'other_context', 0, 12, 1, 1, 'other context', 'other-text',
3839                    'other-sha', 1, '', '', '', '', '', 2, 'Embed', 1)
3840                RETURNING id
3841                ",
3842                [other_file_id],
3843                |row| row.get::<_, i64>(0),
3844            )
3845            .unwrap();
3846        db.storage
3847            .connection()
3848            .execute(
3849                "
3850                INSERT INTO main.symbols(
3851                    file_id, language, name, qualified_name, kind, start_byte, end_byte, signature, docs
3852                )
3853                VALUES (?1, 'rust', 'other_context', 'other_context', 'function', 0, 12, NULL, NULL)
3854                ",
3855                [other_file_id],
3856            )
3857            .unwrap();
3858        db.storage
3859            .connection()
3860            .execute(
3861                "INSERT INTO main.chunk_fts(rowid, text) VALUES (?1, 'other context')",
3862                [other_chunk_id],
3863            )
3864            .unwrap();
3865        drop(db);
3866
3867        let db = IndexDatabase::rebuild(&config).unwrap();
3868
3869        assert_eq!(
3870            db.storage
3871                .connection()
3872                .query_row(
3873                    "SELECT COUNT(*) FROM main.files WHERE worktree_id = 'other-worktree'",
3874                    [],
3875                    |row| row.get::<_, i64>(0)
3876                )
3877                .unwrap(),
3878            1
3879        );
3880        assert_eq!(
3881            db.storage
3882                .connection()
3883                .query_row(
3884                    "SELECT COUNT(*) FROM main.chunks WHERE file_id = ?1",
3885                    [other_file_id],
3886                    |row| { row.get::<_, i64>(0) }
3887                )
3888                .unwrap(),
3889            1
3890        );
3891        assert_eq!(
3892            db.storage
3893                .connection()
3894                .query_row(
3895                    "SELECT COUNT(*) FROM main.symbols WHERE file_id = ?1",
3896                    [other_file_id],
3897                    |row| { row.get::<_, i64>(0) }
3898                )
3899                .unwrap(),
3900            1
3901        );
3902        assert_eq!(
3903            db.storage
3904                .connection()
3905                .query_row(
3906                    "SELECT COUNT(*) FROM main.chunk_fts WHERE rowid = ?1",
3907                    [other_chunk_id],
3908                    |row| { row.get::<_, i64>(0) }
3909                )
3910                .unwrap(),
3911            1
3912        );
3913
3914        fs::remove_dir_all(root).unwrap();
3915    }
3916
3917    #[test]
3918    fn compatible_open_refuses_dirty_and_newer_schema() {
3919        let root = unique_temp_root();
3920        let _ = fs::remove_dir_all(&root);
3921        fs::create_dir_all(root.join(".rag-rat")).unwrap();
3922        let database = root.join(".rag-rat/index.sqlite");
3923        let conn = rusqlite::Connection::open(&database).unwrap();
3924        conn.execute_batch(
3925            "
3926            CREATE TABLE schema_version(
3927                id TEXT PRIMARY KEY,
3928                applied_at_ms INTEGER NOT NULL,
3929                checksum TEXT NOT NULL,
3930                description TEXT NOT NULL
3931            );
3932            INSERT INTO schema_version(id, applied_at_ms, checksum, description)
3933            VALUES ('__dirty__', 1, '', 'partial migration in progress');
3934            ",
3935        )
3936        .unwrap();
3937        drop(conn);
3938
3939        let dirty = IndexDatabase::migration_check(&database).unwrap();
3940        assert_eq!(dirty.state, schema::SchemaState::Dirty);
3941        let err = IndexDatabase::open(&database).unwrap_err().to_string();
3942        assert!(err.contains("dirty or partial"), "{err}");
3943
3944        let conn = rusqlite::Connection::open(&database).unwrap();
3945        conn.execute_batch(
3946            "
3947            DELETE FROM schema_version;
3948            INSERT INTO schema_version(id, applied_at_ms, checksum, description)
3949            VALUES ('999_future_schema', 1, 'sha256:future', 'future schema');
3950            ",
3951        )
3952        .unwrap();
3953        drop(conn);
3954        let newer = IndexDatabase::migration_check(&database).unwrap();
3955        assert_eq!(newer.state, schema::SchemaState::Newer);
3956        let err = IndexDatabase::open(&database).unwrap_err().to_string();
3957        assert!(err.contains("newer rag-rat"), "{err}");
3958
3959        fs::remove_dir_all(root).unwrap();
3960    }
3961
3962    #[test]
3963    fn discover_mode_indexes_new_files_and_removes_deleted_files() {
3964        let root = unique_temp_root();
3965        let _ = fs::remove_dir_all(&root);
3966        fs::create_dir_all(root.join("src")).unwrap();
3967        fs::write(root.join("src/lib.rs"), "pub fn old_symbol() {}\n").unwrap();
3968        let config = source_config(root.clone(), Language::Rust);
3969        let db = IndexDatabase::rebuild(&config).unwrap();
3970        assert_eq!(db.discovery_status(&config).unwrap().unindexed_source_files, 0);
3971
3972        fs::write(root.join("src/new.rs"), "pub fn new_symbol() {}\n").unwrap();
3973        fs::remove_file(root.join("src/lib.rs")).unwrap();
3974        let drift = db.discovery_status(&config).unwrap();
3975        assert_eq!(drift.unindexed_source_files, 1);
3976        assert_eq!(drift.removed_indexed_files, 1);
3977        assert!(drift.warning.as_deref().unwrap().contains("rag-rat index --discover"));
3978
3979        let db = IndexDatabase::index_discover(&config).unwrap();
3980        let fresh = db.discovery_status(&config).unwrap();
3981        assert_eq!(fresh.unindexed_source_files, 0);
3982        assert_eq!(fresh.removed_indexed_files, 0);
3983        assert!(fresh.warning.is_none());
3984        assert_eq!(db.symbols("new_symbol", Some(Language::Rust), 10).unwrap().len(), 1);
3985        assert!(db.symbols("old_symbol", Some(Language::Rust), 10).unwrap().is_empty());
3986
3987        let mut events = Vec::new();
3988        let db = IndexDatabase::index_discover_with_progress(&config, |progress| {
3989            events.push(progress);
3990        })
3991        .unwrap();
3992        assert!(matches!(events.last(), Some(IndexProgress::Finished { files: 0 })));
3993        assert!(
3994            !events.iter().any(|event| matches!(
3995                event,
3996                IndexProgress::PreparingFile { .. } | IndexProgress::IndexingFile { .. }
3997            )),
3998            "no-op discover should not prepare or index files: {events:?}"
3999        );
4000        assert_eq!(db.symbols("new_symbol", Some(Language::Rust), 10).unwrap().len(), 1);
4001
4002        fs::remove_dir_all(root).unwrap();
4003    }
4004
4005    #[cfg(unix)]
4006    #[test]
4007    fn indexing_skips_symlink_loops() {
4008        let root = unique_temp_root();
4009        let _ = fs::remove_dir_all(&root);
4010        fs::create_dir_all(root.join("src")).unwrap();
4011        fs::write(root.join("src/lib.rs"), "pub fn loop_safe_symbol() {}\n").unwrap();
4012        std::os::unix::fs::symlink(&root, root.join("src/loop")).unwrap();
4013
4014        let config = source_config(root.clone(), Language::Rust);
4015        let db = IndexDatabase::rebuild(&config).unwrap();
4016
4017        assert_eq!(db.symbols("loop_safe_symbol", Some(Language::Rust), 10).unwrap().len(), 1);
4018
4019        fs::remove_dir_all(root).unwrap();
4020    }
4021
4022    #[test]
4023    fn dirty_git_files_are_indexed_as_worktree_overlay() {
4024        let root = unique_temp_root();
4025        let _ = fs::remove_dir_all(&root);
4026        let docs = root.join("docs");
4027        fs::create_dir_all(&docs).unwrap();
4028        fs::write(docs.join("search.md"), "# Title\nbase token\n").unwrap();
4029        run_git(&root, &["init"]);
4030        run_git(&root, &["add", "."]);
4031        run_git(
4032            &root,
4033            &[
4034                "-c",
4035                "user.name=Rag Rat Test",
4036                "-c",
4037                "user.email=rag-rat@example.invalid",
4038                "commit",
4039                "-m",
4040                "initial",
4041            ],
4042        );
4043
4044        let config = markdown_config_for_root(root.clone());
4045        let db = IndexDatabase::rebuild(&config).unwrap();
4046        assert_eq!(db.search("base", 10, false).unwrap().len(), 1);
4047
4048        fs::write(docs.join("search.md"), "# Title\noverlay token\n").unwrap();
4049        let db = IndexDatabase::index_changed(&config).unwrap();
4050        let scopes = db
4051            .storage
4052            .connection()
4053            .prepare(
4054                "
4055                SELECT commit_sha != '', worktree_id != ''
4056                FROM main.files
4057                WHERE path = 'docs/search.md'
4058                ORDER BY commit_sha != '' DESC, worktree_id != '' DESC
4059                ",
4060            )
4061            .unwrap()
4062            .query_map([], |row| Ok((row.get::<_, bool>(0)?, row.get::<_, bool>(1)?)))
4063            .unwrap()
4064            .collect::<Result<Vec<_>, _>>()
4065            .unwrap();
4066
4067        assert_eq!(scopes, vec![(true, false), (false, true)]);
4068        assert!(db.search("base", 10, false).unwrap().is_empty());
4069        let overlay_hits = db.search("overlay", 10, false).unwrap();
4070        assert_eq!(overlay_hits.len(), 1);
4071        assert!(overlay_hits[0].summary.contains("overlay token"));
4072
4073        fs::remove_dir_all(root).unwrap();
4074    }
4075
4076    #[test]
4077    fn rebuild_populates_revision_metadata_and_fresh_fts_state() {
4078        let (root, config) = markdown_config("alpha token");
4079        let db = IndexDatabase::rebuild(&config).unwrap();
4080        let status = db.status(&config.database).unwrap();
4081
4082        assert!(!status.content_revision.is_empty());
4083        assert_eq!(status.fts_source_revision.as_deref(), Some(status.content_revision.as_str()));
4084        assert_eq!(
4085            db.meta("content_revision").unwrap().as_deref(),
4086            Some(status.content_revision.as_str())
4087        );
4088        assert!(!status.fts_dirty);
4089        assert!(status.fts_fresh);
4090        assert!(!status.git_history.available);
4091        assert_eq!(status.git_history.commit_count, 0);
4092        assert_eq!(status.local_ai.embedding.state, "MissingModel");
4093        assert_eq!(status.local_ai.fastembed.backend, "fastembed");
4094        assert_eq!(status.local_ai.fastembed.model, ai::FASTEMBED_DISPLAY_MODEL);
4095        assert_eq!(status.local_ai.fastembed.dim, ai::FASTEMBED_EMBEDDING_DIM);
4096        assert!(!status.local_ai.fastembed.cache.is_empty());
4097        assert_eq!(status.local_ai.fastembed.build_feature_enabled, cfg!(feature = "fastembed"));
4098        assert_eq!(status.local_ai.artifacts.total_chunks, 1);
4099        assert_eq!(
4100            status.local_ai.artifacts.eligible_chunks + status.local_ai.artifacts.skipped_chunks,
4101            status.local_ai.artifacts.total_chunks
4102        );
4103        assert_eq!(
4104            status.local_ai.fastembed.eligible_embeddings
4105                + status.local_ai.fastembed.skipped_embeddings,
4106            status.local_ai.artifacts.total_chunks
4107        );
4108        assert_eq!(indexed_revision_count(&db), 1);
4109        assert_eq!(chunk_source_revision_count(&db), 1);
4110
4111        fs::remove_dir_all(root).unwrap();
4112    }
4113
4114    #[cfg(not(feature = "fastembed"))]
4115    #[test]
4116    fn fastembed_missing_feature_reports_rebuild_command() {
4117        let (root, config) = markdown_config("alpha token\n");
4118        let db = IndexDatabase::rebuild(&config).unwrap();
4119
4120        let err = db.install_model(ai::FASTEMBED_MODEL_ID).unwrap_err();
4121        assert!(err.to_string().contains(ai::FASTEMBED_MISSING_FEATURE_MESSAGE));
4122
4123        let status = db.local_ai_status().unwrap();
4124        assert!(!status.fastembed.build_feature_enabled);
4125        assert_eq!(status.fastembed.status, "MissingRuntime");
4126        assert_eq!(
4127            status.fastembed.message.as_deref(),
4128            Some(ai::FASTEMBED_MISSING_FEATURE_MESSAGE)
4129        );
4130        assert_eq!(status.fastembed.next.as_deref(), Some("cargo install rag-rat"));
4131
4132        fs::remove_dir_all(root).unwrap();
4133    }
4134
4135    #[test]
4136    fn reconcile_requires_explicit_model_install_and_ignores_stale_artifacts() {
4137        let (root, config) = markdown_config(
4138            "alpha token\nsecond line with enough detail for the semantic embedding policy to keep this chunk\nthird line with runtime context\n",
4139        );
4140        let db = IndexDatabase::rebuild(&config).unwrap();
4141        let chunk_id = first_chunk_id(&db);
4142
4143        let models = db.list_models().unwrap();
4144        let embedding = models.iter().find(|model| model.model_id == ai::HASH_MODEL_ID).unwrap();
4145        assert!(!embedding.installed);
4146        assert_eq!(embedding.status, "MissingModel");
4147
4148        let hits = db.search("alpha", 10, false).unwrap();
4149        assert_eq!(hits.len(), 1);
4150        assert!(hits[0].summary.contains("alpha token"));
4151
4152        let blocked = db.reconcile(Some(1), Some(8)).unwrap();
4153        assert_eq!(blocked.processed_chunks, 0);
4154        assert_eq!(blocked.embeddings_written, 0);
4155        assert_eq!(blocked.blocked_chunks, 0);
4156        assert_eq!(blocked.model_id, ai::HASH_MODEL_ID);
4157        assert_eq!(blocked.batch_size, 8);
4158        assert_eq!(blocked.status, "Blocked");
4159
4160        let status = db.local_ai_status().unwrap();
4161        assert_eq!(status.embedding.state, "MissingModel");
4162        assert_eq!(status.embedding.blocked_artifacts, 0);
4163
4164        db.install_model(ai::HASH_MODEL_ID).unwrap();
4165        let plan = db.reconcile_plan().unwrap();
4166        assert_eq!(plan.embeddings.missing, 1);
4167        assert_eq!(plan.embeddings.current, 0);
4168        let current = db.reconcile(Some(1), Some(8)).unwrap();
4169        assert_eq!(current.embeddings_written, 1);
4170        assert_eq!(current.model_id, ai::HASH_MODEL_ID);
4171        assert_eq!(current.model_version, "hash-v1");
4172        assert_eq!(current.embedding_dim, ai::HASH_EMBEDDING_DIM);
4173        assert_eq!(current.status, "Current");
4174        assert_eq!(current.work_reasons.get("Missing"), Some(&1));
4175        let noop = db.reconcile(None, Some(8)).unwrap();
4176        assert_eq!(noop.processed_chunks, 0);
4177        assert_eq!(noop.embeddings_written, 0);
4178        let status = db.local_ai_status().unwrap();
4179        assert_eq!(status.embedding.state, "Ready");
4180        assert_eq!(status.embedding.current_artifacts, 1);
4181        let embedding_bytes: i64 = db
4182            .storage
4183            .connection()
4184            .query_row(
4185                "SELECT length(vector_blob) FROM chunk_embeddings WHERE chunk_id = ?1 AND status = 'Current'",
4186                [chunk_id],
4187                |row| row.get(0),
4188            )
4189            .unwrap();
4190        assert_eq!(embedding_bytes, (ai::HASH_EMBEDDING_DIM * 4) as i64);
4191
4192        let hits = db.search("alpha", 10, false).unwrap();
4193        assert!(hits[0].summary.contains("alpha token"));
4194
4195        db.storage.connection().execute("DELETE FROM chunk_fts", []).unwrap();
4196        let vector_hits = db.search("alpha", 10, false).unwrap();
4197        assert_eq!(vector_hits.len(), 1);
4198        assert_eq!(vector_hits[0].chunk_id, chunk_id);
4199
4200        db.storage
4201            .connection()
4202            .execute(
4203                "UPDATE chunk_embeddings SET source_text_hash = 'old-hash' WHERE chunk_id = ?1",
4204                [chunk_id],
4205            )
4206            .unwrap();
4207        let plan = db.reconcile_plan().unwrap();
4208        assert_eq!(plan.embeddings.current, 0);
4209        assert_eq!(plan.embeddings.stale, 1);
4210        let refreshed = db.reconcile(None, Some(8)).unwrap();
4211        assert_eq!(refreshed.processed_chunks, 1);
4212        assert_eq!(refreshed.work_reasons.get("SourceChanged"), Some(&1));
4213        assert_eq!(db.current_embedding_count(ai::HASH_MODEL_ID).unwrap(), 1);
4214        let stale_embedding_hits = db.search("alpha", 10, false).unwrap();
4215        assert_eq!(stale_embedding_hits.len(), 1);
4216
4217        fs::remove_dir_all(root).unwrap();
4218    }
4219
4220    #[cfg(feature = "fastembed")]
4221    #[test]
4222    fn cached_fastembed_model_recovers_ready_state() {
4223        let (root, config) = markdown_config("alpha token\n");
4224        let db = IndexDatabase::rebuild(&config).unwrap();
4225        let cache_dir = root.join("models");
4226        let revision = "5f1b8cd78bc4fb444dd171e59b18f3a3af89a079";
4227        let repo = cache_dir.join("models--Qdrant--all-MiniLM-L6-v2-onnx");
4228        fs::create_dir_all(repo.join("refs")).unwrap();
4229        fs::create_dir_all(repo.join("snapshots").join(revision)).unwrap();
4230        fs::write(repo.join("refs").join("main"), revision).unwrap();
4231
4232        ai::recover_cached_fastembed_model_at(db.storage.connection(), &cache_dir).unwrap();
4233
4234        let models = db.list_models().unwrap();
4235        let fastembed =
4236            models.iter().find(|model| model.model_id == ai::FASTEMBED_MODEL_ID).unwrap();
4237        assert!(fastembed.installed);
4238        assert_eq!(fastembed.status, "Ready");
4239        let status = db.local_ai_status().unwrap();
4240        assert_eq!(status.fastembed.status, "Ready");
4241        assert!(status.fastembed.active);
4242
4243        fs::remove_dir_all(root).unwrap();
4244    }
4245
4246    #[cfg(feature = "fastembed")]
4247    #[test]
4248    fn compatible_migrate_recovers_cached_fastembed_model() {
4249        let (root, config) = markdown_config("alpha token\n");
4250        let db = IndexDatabase::rebuild(&config).unwrap();
4251        let cache_dir = root.join("models");
4252        let revision = "5f1b8cd78bc4fb444dd171e59b18f3a3af89a079";
4253        let repo = cache_dir.join("models--Qdrant--all-MiniLM-L6-v2-onnx");
4254        fs::create_dir_all(repo.join("refs")).unwrap();
4255        fs::create_dir_all(repo.join("snapshots").join(revision)).unwrap();
4256        fs::write(repo.join("refs").join("main"), revision).unwrap();
4257        db.storage
4258            .connection()
4259            .execute(
4260                "UPDATE ai_models
4261                 SET installed = 0, status = 'MissingModel', installed_at_ms = NULL
4262                 WHERE model_id = ?1",
4263                [ai::FASTEMBED_MODEL_ID],
4264            )
4265            .unwrap();
4266
4267        IndexDatabase::migrate_with_fastembed_cache(&config.database, Some(&cache_dir)).unwrap();
4268
4269        let db = IndexDatabase::open(&config.database).unwrap();
4270        let status = db.local_ai_status().unwrap();
4271        assert_eq!(status.fastembed.status, "Ready");
4272        assert!(status.fastembed.active);
4273
4274        fs::remove_dir_all(root).unwrap();
4275    }
4276
4277    #[test]
4278    fn reconcile_without_limit_processes_all_chunks() {
4279        let (root, config) = markdown_config(
4280            "# One\nalpha token with enough surrounding detail for embedding eligibility and useful semantic context\n\n# Two\nbeta token with enough surrounding detail for embedding eligibility and useful semantic context\n",
4281        );
4282        let db = IndexDatabase::rebuild(&config).unwrap();
4283        db.install_model(ai::HASH_MODEL_ID).unwrap();
4284
4285        let report = db.reconcile(None, Some(2)).unwrap();
4286
4287        assert_eq!(report.processed_chunks, 2);
4288        assert_eq!(report.embeddings_written, 2);
4289        assert_eq!(report.batch_size, 2);
4290        assert_eq!(db.current_embedding_count(ai::HASH_MODEL_ID).unwrap(), 2);
4291        let second = db.reconcile(None, Some(2)).unwrap();
4292        assert_eq!(second.processed_chunks, 0);
4293
4294        fs::remove_dir_all(root).unwrap();
4295    }
4296
4297    #[test]
4298    fn reconcile_treats_c_chunks_as_embedding_eligible() {
4299        let root = unique_temp_root();
4300        let _ = fs::remove_dir_all(&root);
4301        fs::create_dir_all(root.join("src")).unwrap();
4302        fs::write(
4303            root.join("src/main.c"),
4304            r#"
4305static int read_sensor_value(int baseline)
4306{
4307    int adjusted = baseline + 42;
4308    return adjusted;
4309}
4310
4311int main(void)
4312{
4313    int sample = read_sensor_value(7);
4314    return sample == 49 ? 0 : 1;
4315}
4316"#,
4317        )
4318        .unwrap();
4319        let config = source_config(root.clone(), Language::C);
4320        let db = IndexDatabase::rebuild(&config).unwrap();
4321        db.install_model(ai::HASH_MODEL_ID).unwrap();
4322
4323        let plan = db.reconcile_plan().unwrap();
4324
4325        assert_eq!(plan.embeddings.skipped_by_policy.get("SkipLanguageUnsupported"), None);
4326        assert!(plan.embeddings.missing > 0, "plan: {:?}", plan.embeddings);
4327
4328        let report = db.reconcile(None, Some(8)).unwrap();
4329        assert!(report.embeddings_written > 0, "report: {report:?}");
4330
4331        fs::remove_dir_all(root).unwrap();
4332    }
4333
4334    #[test]
4335    fn reconcile_policy_skips_tiny_chunks_before_embedding() {
4336        let (root, config) = markdown_config("tiny\n");
4337        let db = IndexDatabase::rebuild(&config).unwrap();
4338        db.install_model(ai::HASH_MODEL_ID).unwrap();
4339
4340        let plan = db.reconcile_plan().unwrap();
4341        assert_eq!(plan.embeddings.missing, 0);
4342        assert_eq!(plan.embeddings.skipped_by_policy.get("SkipTooSmall"), Some(&1));
4343
4344        let report = db.reconcile(None, Some(8)).unwrap();
4345        assert_eq!(report.embeddings_written, 0);
4346        assert_eq!(report.skipped_by_policy.get("SkipTooSmall"), Some(&1));
4347        assert_eq!(db.current_embedding_count(ai::HASH_MODEL_ID).unwrap(), 0);
4348
4349        fs::remove_dir_all(root).unwrap();
4350    }
4351
4352    #[test]
4353    fn reconcile_plan_reports_policy_skips_for_fastembed_model() {
4354        let (root, config) = markdown_config("tiny\n");
4355        let db = IndexDatabase::rebuild(&config).unwrap();
4356        db.storage
4357            .connection()
4358            .execute(
4359                "UPDATE ai_models
4360                 SET installed = 1, disabled = 0, status = 'Ready', embedding_dim = ?2
4361                 WHERE model_id = ?1",
4362                params![
4363                    ai::FASTEMBED_MODEL_ID,
4364                    i64::try_from(ai::FASTEMBED_EMBEDDING_DIM).unwrap()
4365                ],
4366            )
4367            .unwrap();
4368        db.storage
4369            .connection()
4370            .execute(
4371                "INSERT INTO index_meta(key, value) VALUES ('active_embedding_model', ?1)
4372                 ON CONFLICT(key) DO UPDATE SET value = excluded.value",
4373                [ai::FASTEMBED_MODEL_ID],
4374            )
4375            .unwrap();
4376
4377        let plan = db.reconcile_plan().unwrap();
4378
4379        assert_eq!(plan.embeddings.model_id, ai::FASTEMBED_MODEL_ID);
4380        assert_eq!(plan.embeddings.missing, 0);
4381        assert_eq!(plan.embeddings.skipped_by_policy.get("SkipTooSmall"), Some(&1));
4382
4383        fs::remove_dir_all(root).unwrap();
4384    }
4385
4386    #[cfg(not(feature = "fastembed"))]
4387    #[test]
4388    fn blocked_fastembed_reconcile_still_reports_policy_skips() {
4389        let (root, config) = markdown_config("tiny\n");
4390        let db = IndexDatabase::rebuild(&config).unwrap();
4391        db.storage
4392            .connection()
4393            .execute(
4394                "INSERT INTO index_meta(key, value) VALUES ('active_embedding_model', ?1)
4395                 ON CONFLICT(key) DO UPDATE SET value = excluded.value",
4396                [ai::FASTEMBED_MODEL_ID],
4397            )
4398            .unwrap();
4399
4400        let report = db.reconcile(None, Some(8)).unwrap();
4401
4402        assert_eq!(report.status, "Blocked");
4403        assert_eq!(report.skipped_by_policy.get("SkipTooSmall"), Some(&1));
4404
4405        fs::remove_dir_all(root).unwrap();
4406    }
4407
4408    #[test]
4409    fn search_explain_reports_weighted_score_components() {
4410        let (root, config) = markdown_config(
4411            "alpha runtime shutdown\nsecond line with enough detail for embedding eligibility and semantic vector scoring\nthird line\n",
4412        );
4413        let db = IndexDatabase::rebuild(&config).unwrap();
4414        db.install_model(ai::HASH_MODEL_ID).unwrap();
4415        db.reconcile(None, Some(8)).unwrap();
4416
4417        let hits = db.search_explain("runtime shutdown", 10, false).unwrap();
4418
4419        assert_eq!(hits.len(), 1);
4420        let components = hits[0].score_components.as_ref().unwrap();
4421        let component_sum = components.bm25
4422            + components.vector
4423            + components.symbol
4424            + components.graph
4425            + components.git
4426            + components.github;
4427        assert!((hits[0].score - component_sum).abs() < 0.000_001);
4428        assert!(components.bm25 > 0.0);
4429        assert!(components.vector > 0.0);
4430        assert!(components.vector_note.is_none());
4431        assert!(components.bm25 <= 0.45);
4432        assert!(components.vector <= 0.35);
4433        assert!(components.symbol <= 0.10);
4434        assert!(components.graph <= 0.05);
4435        assert!(components.git <= 0.03);
4436        assert!(components.github <= 0.02);
4437        assert!(db.search("runtime shutdown", 10, false).unwrap()[0].score_components.is_none());
4438
4439        fs::remove_dir_all(root).unwrap();
4440    }
4441
4442    #[test]
4443    fn search_explain_labels_missing_vector_runtime() {
4444        let (root, config) = markdown_config(
4445            "alpha runtime shutdown\nsecond line with enough detail for lexical search without embeddings\nthird line\n",
4446        );
4447        let db = IndexDatabase::rebuild(&config).unwrap();
4448
4449        let hits = db.search_explain("runtime shutdown", 10, false).unwrap();
4450
4451        assert_eq!(hits.len(), 1);
4452        let components = hits[0].score_components.as_ref().unwrap();
4453        assert!(components.bm25 > 0.0);
4454        assert_eq!(components.vector, 0.0);
4455        assert_eq!(
4456            components.vector_note.as_deref(),
4457            Some("vector search unavailable: no current embedding model")
4458        );
4459
4460        fs::remove_dir_all(root).unwrap();
4461    }
4462
4463    #[test]
4464    fn git_history_indexes_commits_paths_queries_and_blame() {
4465        let root = unique_temp_root();
4466        let _ = fs::remove_dir_all(&root);
4467        fs::create_dir_all(root.join("docs")).unwrap();
4468        fs::create_dir_all(root.join("src")).unwrap();
4469        run_git(&root, &["init"]);
4470        run_git(&root, &["config", "user.name", "Rag Rat"]);
4471        run_git(&root, &["config", "user.email", "rag@example.com"]);
4472
4473        fs::write(root.join("docs/search.md"), "# Title\nalpha token\n").unwrap();
4474        fs::write(root.join("src/lib.rs"), "pub fn tracked_symbol() {}\n").unwrap();
4475        run_git(&root, &["add", "."]);
4476        run_git(&root, &["commit", "-m", "Add alpha docs"]);
4477
4478        fs::write(root.join("docs/search.md"), "# Title\nbeta token\n").unwrap();
4479        run_git(&root, &["add", "."]);
4480        run_git(&root, &["commit", "-m", "Refresh beta docs"]);
4481
4482        let config = Config {
4483            root: root.clone(),
4484            database: root.join(".rag-rat/index.sqlite"),
4485            targets: vec![
4486                ResolvedTarget {
4487                    name: "markdown".to_string(),
4488                    language: Language::Markdown,
4489                    directories: vec![PathBuf::from("docs")],
4490                    include: vec!["**/*.md".to_string()],
4491                    exclude: Vec::new(),
4492                    kind: TargetKind::Docs,
4493                },
4494                ResolvedTarget {
4495                    name: "rust".to_string(),
4496                    language: Language::Rust,
4497                    directories: vec![PathBuf::from("src")],
4498                    include: vec!["**/*.rs".to_string()],
4499                    exclude: Vec::new(),
4500                    kind: TargetKind::Source,
4501                },
4502            ],
4503            local_ai: Default::default(),
4504        };
4505        let db = IndexDatabase::rebuild(&config).unwrap();
4506        let status = db.status(&config.database).unwrap();
4507        assert!(status.git_history.available);
4508        assert!(status.git_history.head.is_some());
4509        assert_eq!(status.git_history.indexed_head, status.git_history.head);
4510        assert_eq!(status.git_history.commit_count, 2);
4511        assert_eq!(status.git_history.file_change_count, 3);
4512
4513        let commit_hits = db.commit_search("beta", 10).unwrap();
4514        assert_eq!(commit_hits.len(), 1);
4515        assert_eq!(commit_hits[0].subject, "Refresh beta docs");
4516        assert_eq!(commit_hits[0].evidence_kind, "historical");
4517        assert!(commit_hits[0].score > 0.0);
4518
4519        let path_history = db.git_history_for_path("docs/search.md", 10).unwrap();
4520        assert_eq!(path_history.len(), 2);
4521        assert!(path_history.iter().all(|item| item.evidence_kind == "historical"));
4522
4523        let symbol_history =
4524            db.git_history_for_symbol("tracked_symbol", Some(Language::Rust), 10).unwrap();
4525        assert_eq!(symbol_history.len(), 1);
4526        assert_eq!(symbol_history[0].path, "src/lib.rs");
4527        assert_eq!(symbol_history[0].evidence_kind, "historical");
4528        let impact = db.impact_surface("tracked_symbol", 10).unwrap();
4529        assert!(impact.iter().any(|item| {
4530            item.category == "Direct structural impact" && item.reason == "exact_symbol_definition"
4531        }));
4532        assert!(impact.iter().any(|item| {
4533            item.category == "Historical/papertrail evidence"
4534                && item.reason == "git_commit_touched_file"
4535        }));
4536
4537        let query_commits = db.commits_touching_query("beta", 10).unwrap();
4538        let beta_commit =
4539            query_commits.iter().find(|hit| hit.subject == "Refresh beta docs").unwrap();
4540        assert!(beta_commit.evidence.iter().any(|value| value == "commit_message"));
4541        assert!(beta_commit.evidence.iter().any(|value| value == "file_change"));
4542        assert_eq!(beta_commit.evidence_kind, "historical");
4543
4544        let chunk_id = first_chunk_id(&db);
4545        let blame = db.git_blame_chunk(chunk_id).unwrap().unwrap();
4546        assert_eq!(blame.source_text_hash, hex_sha256("# Title\nbeta token\n".as_bytes()));
4547        assert_eq!(blame.line_count, 2);
4548        assert_eq!(blame.commit_counts.values().sum::<i64>(), 2);
4549        assert!(blame.dominant_commit_lines >= 1);
4550        assert!(blame.dominant_commit.is_some());
4551        assert_eq!(blame.evidence_kind, "historical");
4552        let cached = db.git_blame_chunk(chunk_id).unwrap().unwrap();
4553        assert_eq!(cached.source_text_hash, blame.source_text_hash);
4554
4555        fs::remove_dir_all(root).unwrap();
4556    }
4557
4558    #[test]
4559    fn indexes_rust_graph_edges_from_tree_sitter() {
4560        let root = unique_temp_root();
4561        let _ = fs::remove_dir_all(&root);
4562        fs::create_dir_all(root.join("src")).unwrap();
4563        fs::write(
4564            root.join("src/lib.rs"),
4565            r#"
4566use crate::worker::Worker;
4567mod worker;
4568
4569trait Service {
4570    fn serve(&self);
4571}
4572
4573struct Worker;
4574
4575impl Service for Worker {
4576    fn serve(&self) {
4577        helper();
4578    }
4579}
4580
4581fn helper() {}
4582
4583fn caller() {
4584    helper();
4585    Worker.serve();
4586}
4587"#,
4588        )
4589        .unwrap();
4590        let config = source_config(root.clone(), Language::Rust);
4591        let db = IndexDatabase::rebuild(&config).unwrap();
4592
4593        assert_edge(&db, "caller", "helper", "calls_name", "Syntactic");
4594        assert_edge(&db, "Worker", "Service", "implements", "Syntactic");
4595        assert_edge(&db, "src/lib.rs", "worker", "imports", "Syntactic");
4596        let callers = db.find_callers("helper", 10).unwrap();
4597        assert!(
4598            callers.iter().any(|edge| {
4599                edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("caller"))
4600                    && edge.edge_kind == "calls_name"
4601            }),
4602            "helper callers: {callers:?}"
4603        );
4604
4605        fs::remove_dir_all(root).unwrap();
4606    }
4607
4608    #[test]
4609    fn ffi_surface_labels_exported_impl_members_separately() {
4610        let root = unique_temp_root();
4611        let _ = fs::remove_dir_all(&root);
4612        fs::create_dir_all(root.join("src")).unwrap();
4613        fs::write(
4614            root.join("src/lib.rs"),
4615            r#"
4616pub struct PhraseRepo;
4617
4618#[uniffi::export]
4619impl PhraseRepo {
4620    pub fn children(&self) {}
4621    pub fn journal(&self) {}
4622}
4623
4624#[cfg_attr(not(target_arch = "wasm32"), uniffi::export(async_runtime = "tokio"))]
4625impl Runtime {
4626    pub fn route_search_query(&self) {}
4627}
4628
4629pub struct Runtime;
4630
4631/// Not #[uniffi::export]: this is an internal helper.
4632pub fn internal_helper() {}
4633
4634#[cfg_attr(target_arch = "wasm32", ::uniffi::export)]
4635pub fn exported_fn() {}
4636"#,
4637        )
4638        .unwrap();
4639        let config = source_config(root.clone(), Language::Rust);
4640        let db = IndexDatabase::rebuild(&config).unwrap();
4641
4642        let surface = db.ffi_surface(20).unwrap();
4643        assert!(
4644            surface.iter().any(|item| {
4645                item.reason == "rust_uniffi_export"
4646                    && item.symbol.as_deref().is_some_and(|symbol| symbol.ends_with("exported_fn"))
4647            }),
4648            "direct export should remain direct: {surface:?}"
4649        );
4650        assert!(
4651            surface.iter().any(|item| item.reason == "rust_uniffi_exported_impl"),
4652            "exported impl/type surface should be explicit: {surface:?}"
4653        );
4654        assert!(
4655            surface.iter().any(|item| {
4656                item.reason == "rust_uniffi_impl_member"
4657                    && item
4658                        .symbol
4659                        .as_deref()
4660                        .is_some_and(|symbol| symbol.ends_with("route_search_query"))
4661            }),
4662            "cfg_attr exported impl member should be labeled separately: {surface:?}"
4663        );
4664        assert!(
4665            surface.iter().any(|item| {
4666                item.reason == "rust_uniffi_impl_member"
4667                    && item.symbol.as_deref().is_some_and(|symbol| symbol.ends_with("children"))
4668            }),
4669            "impl member should be labeled separately: {surface:?}"
4670        );
4671        assert!(
4672            !surface.iter().any(|item| {
4673                item.reason == "rust_uniffi_export"
4674                    && item.symbol.as_deref().is_some_and(|symbol| {
4675                        symbol.ends_with("children") || symbol.ends_with("journal")
4676                    })
4677            }),
4678            "impl members must not be reported as direct exports: {surface:?}"
4679        );
4680        assert!(
4681            !surface.iter().any(|item| {
4682                item.symbol.as_deref().is_some_and(|symbol| symbol.ends_with("internal_helper"))
4683            }),
4684            "comment-only UniFFI mentions must not create FFI surface rows: {surface:?}"
4685        );
4686
4687        fs::remove_dir_all(root).unwrap();
4688    }
4689
4690    #[test]
4691    fn search_and_read_chunk_attach_bounded_graph_evidence() {
4692        let root = unique_temp_root();
4693        let _ = fs::remove_dir_all(&root);
4694        fs::create_dir_all(root.join("src")).unwrap();
4695        fs::write(
4696            root.join("src/lib.rs"),
4697            "pub fn helper() {}\n\npub fn caller() {\n    helper();\n}\n",
4698        )
4699        .unwrap();
4700        let config = source_config(root.clone(), Language::Rust);
4701        let db = IndexDatabase::rebuild(&config).unwrap();
4702
4703        let hits = db.search("helper caller", 10, false).unwrap();
4704        let helper_hit = hits
4705            .iter()
4706            .find(|hit| hit.symbol_path.as_deref().is_some_and(|path| path.ends_with("helper")))
4707            .expect("helper search hit");
4708        let helper_graph = helper_hit.graph.as_ref().expect("helper graph evidence");
4709        assert_eq!(helper_graph.caller_count, 1);
4710        assert!(helper_graph.top_callers.iter().any(|caller| {
4711            caller.symbol_path.ends_with("caller")
4712                && caller.callsite.line == 4
4713                && caller.callsite.span == [4, 4]
4714                && caller.confidence == "syntactic"
4715        }));
4716        assert!(helper_graph.callers.is_empty(), "search keeps graph compact");
4717
4718        let caller_hit = hits
4719            .iter()
4720            .find(|hit| hit.symbol_path.as_deref().is_some_and(|path| path.ends_with("caller")))
4721            .expect("caller search hit");
4722        let caller_graph = caller_hit.graph.as_ref().expect("caller graph evidence");
4723        assert!(caller_graph.top_callees.iter().any(|callee| {
4724            callee.target == "helper"
4725                && callee.callsite.line == 4
4726                && callee.callsite.span == [4, 4]
4727                && callee.confidence == "syntactic"
4728        }));
4729
4730        let chunk = db.read_chunk(caller_hit.chunk_id).unwrap().expect("caller chunk");
4731        let full_graph = chunk.graph.as_ref().expect("full read_chunk graph");
4732        assert!(full_graph.symbol.as_ref().is_some_and(|symbol| symbol.name == "caller"));
4733        assert!(
4734            full_graph
4735                .callees
4736                .iter()
4737                .any(|callee| callee.target == "helper" && callee.callsite.line == 4)
4738        );
4739        assert!(full_graph.notes.iter().any(|note| note.contains("tree-sitter/syntactic")));
4740
4741        fs::remove_dir_all(root).unwrap();
4742    }
4743
4744    #[test]
4745    fn graph_exact_mode_requires_verified_symbol_identity() {
4746        let root = unique_temp_root();
4747        let _ = fs::remove_dir_all(&root);
4748        fs::create_dir_all(root.join("src")).unwrap();
4749        fs::write(
4750            root.join("src/lib.rs"),
4751            "pub fn helper() {}\n\npub fn caller() {\n    helper();\n}\n",
4752        )
4753        .unwrap();
4754        let config = source_config(root.clone(), Language::Rust);
4755        let db = IndexDatabase::rebuild(&config).unwrap();
4756        let helper = db.symbols("helper", Some(Language::Rust), 10).unwrap().remove(0);
4757        let caller = db.symbols("caller", Some(Language::Rust), 10).unwrap().remove(0);
4758
4759        let bare_exact = db
4760            .find_callers_with_options(
4761                "helper",
4762                10,
4763                &crate::query::graph::GraphTraversalOptions {
4764                    resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
4765                    ..Default::default()
4766                },
4767            )
4768            .unwrap();
4769        assert!(bare_exact.is_empty(), "bare exact lookup should not fall back: {bare_exact:?}");
4770
4771        let exact_callers = db
4772            .find_callers_with_options(
4773                "helper",
4774                10,
4775                &crate::query::graph::GraphTraversalOptions {
4776                    resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
4777                    symbol_id: Some(helper.symbol_id),
4778                    ..Default::default()
4779                },
4780            )
4781            .unwrap();
4782        assert!(
4783            exact_callers.iter().any(|edge| {
4784                edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("caller"))
4785                    && edge.verified_target_symbol
4786            }),
4787            "exact callers: {exact_callers:?}"
4788        );
4789        assert!(exact_callers.iter().all(|edge| edge.verified_target_symbol));
4790
4791        let exact_callees = db
4792            .trace_callees_with_options(
4793                "caller",
4794                10,
4795                &crate::query::graph::GraphTraversalOptions {
4796                    resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
4797                    symbol_id: Some(caller.symbol_id),
4798                    ..Default::default()
4799                },
4800            )
4801            .unwrap();
4802        assert!(
4803            exact_callees.iter().any(|edge| {
4804                edge.target.as_deref() == Some("helper") && edge.verified_target_symbol
4805            }),
4806            "exact callees: {exact_callees:?}"
4807        );
4808        assert!(exact_callees.iter().all(|edge| edge.verified_target_symbol));
4809
4810        fs::remove_dir_all(root).unwrap();
4811    }
4812
4813    #[test]
4814    fn symbol_lookup_ranks_type_definitions_before_impl_blocks() {
4815        let root = unique_temp_root();
4816        let _ = fs::remove_dir_all(&root);
4817        fs::create_dir_all(root.join("src")).unwrap();
4818        fs::write(
4819            root.join("src/lib.rs"),
4820            r#"
4821impl Database {
4822    pub fn open() -> Self {
4823        Database
4824    }
4825}
4826
4827pub struct Database;
4828"#,
4829        )
4830        .unwrap();
4831        let config = source_config(root.clone(), Language::Rust);
4832        let db = IndexDatabase::rebuild(&config).unwrap();
4833        let hits = db.symbols("Database", Some(Language::Rust), 10).unwrap();
4834        assert!(hits.len() >= 2, "fixture should expose both impl and struct symbols: {hits:?}");
4835        assert_eq!(hits[0].kind, "struct", "Database lookup should prefer type definition");
4836        assert!(
4837            hits.iter().any(|hit| hit.kind == "impl"),
4838            "impl Database should still be available after the struct: {hits:?}"
4839        );
4840
4841        fs::remove_dir_all(root).unwrap();
4842    }
4843
4844    #[test]
4845    fn logical_symbol_exact_mode_covers_duplicate_rust_variants() {
4846        let root = unique_temp_root();
4847        let _ = fs::remove_dir_all(&root);
4848        fs::create_dir_all(root.join("src")).unwrap();
4849        fs::write(
4850            root.join("src/lib.rs"),
4851            r#"
4852#[cfg(not(target_arch = "wasm32"))]
4853pub fn spawn_blocking() {}
4854
4855#[cfg(target_arch = "wasm32")]
4856pub fn spawn_blocking() {}
4857
4858pub fn caller() {
4859    spawn_blocking();
4860}
4861"#,
4862        )
4863        .unwrap();
4864        let config = source_config(root.clone(), Language::Rust);
4865        let db = IndexDatabase::rebuild(&config).unwrap();
4866        let lookup = db
4867            .symbol_candidates(&crate::query::symbol::SymbolSelector {
4868                logical_symbol_id: None,
4869                symbol_id: None,
4870                symbol_path: None,
4871                symbol: Some("spawn_blocking".to_string()),
4872                language: Some(Language::Rust),
4873                allow_ambiguous: true,
4874                limit: 10,
4875            })
4876            .unwrap();
4877        let logical_symbol_id = lookup.candidates[0].logical_symbol_id.expect("logical id");
4878        assert_eq!(lookup.candidates[0].logical_variant_count, Some(2));
4879        assert_eq!(lookup.candidates[0].logical_group_reason.as_deref(), Some("cfg_variant"));
4880
4881        let exact_variant_callers = db
4882            .find_callers_with_options(
4883                "spawn_blocking",
4884                10,
4885                &crate::query::graph::GraphTraversalOptions {
4886                    resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
4887                    symbol_id: Some(lookup.candidates[1].symbol_id),
4888                    ..Default::default()
4889                },
4890            )
4891            .unwrap();
4892        assert!(
4893            exact_variant_callers.iter().any(|edge| {
4894                edge.from_symbol.as_deref().is_some_and(|symbol| symbol.ends_with("caller"))
4895                    && edge.target.as_deref() == Some("spawn_blocking")
4896                    && edge.verified_target_symbol
4897            }),
4898            "symbol_id exact should include its logical cfg group: {exact_variant_callers:?}"
4899        );
4900        assert!(exact_variant_callers.iter().all(|edge| edge.verified_target_symbol));
4901
4902        let exact_logical = db
4903            .graph_traversal_report(
4904                "find_callers",
4905                &lookup.candidates[0],
4906                true,
4907                10,
4908                &crate::query::graph::GraphTraversalOptions {
4909                    resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
4910                    symbol_id: Some(lookup.candidates[0].symbol_id),
4911                    ..Default::default()
4912                },
4913            )
4914            .unwrap();
4915        assert_eq!(exact_logical.query.logical_symbol_id, Some(logical_symbol_id));
4916        assert_eq!(
4917            exact_logical.logical_symbol.as_ref().map(|symbol| symbol.variant_count),
4918            Some(2)
4919        );
4920        assert_eq!(exact_logical.variants.len(), 2);
4921        assert!(exact_logical.results.iter().all(|edge| edge.verified_target_symbol));
4922        assert!(
4923            exact_logical.results.iter().any(|edge| {
4924                edge.from_symbol.as_deref().is_some_and(|symbol| symbol.ends_with("caller"))
4925                    && edge.target.as_deref() == Some("spawn_blocking")
4926            }),
4927            "logical exact callers: {exact_logical:?}"
4928        );
4929
4930        fs::remove_dir_all(root).unwrap();
4931    }
4932
4933    #[test]
4934    fn indexes_real_world_rust_graph_patterns() {
4935        let root = fixture_temp_root("graph-realworld/rust");
4936        let config = source_config(root.clone(), Language::Rust);
4937        let db = IndexDatabase::rebuild(&config).unwrap();
4938
4939        assert_edge(&db, "src/lib.rs", "worker", "imports", "Syntactic");
4940        assert_edge(&db, "src/lib.rs", "Worker", "exports", "Syntactic");
4941        assert_edge(&db, "entry", "new", "calls_name", "NameOnly");
4942        assert_edge(&db, "entry", "Client", "references_type", "Syntactic");
4943        assert_edge(&db, "drive", "serve", "calls_name", "NameOnly");
4944        assert_edge(&db, "drive", "GenericRunner", "references_type", "Syntactic");
4945        assert_edge(&db, "Worker", "Service", "implements", "Syntactic");
4946        assert_edge(&db, "generic_call", "T", "references_type", "NameOnly");
4947        assert_edge(&db, "entry", "generated_call", "uses_macro", "NameOnly");
4948        let syntactic_callers = db.find_callers("serve", 10).unwrap();
4949        assert!(
4950            syntactic_callers.is_empty(),
4951            "syntactic serve callers should avoid receiver/name fallback: {syntactic_callers:?}"
4952        );
4953        let callers = db
4954            .find_callers_with_options(
4955                "serve",
4956                10,
4957                &crate::query::graph::GraphTraversalOptions {
4958                    resolution_mode: crate::query::graph::GraphResolutionMode::Fuzzy,
4959                    ..Default::default()
4960                },
4961            )
4962            .unwrap();
4963        assert!(
4964            callers.iter().any(|edge| {
4965                edge.edge_kind == "calls_name"
4966                    && edge.edge_confidence == edge.confidence
4967                    && edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("drive"))
4968            }),
4969            "serve callers: {callers:?}"
4970        );
4971
4972        fs::remove_dir_all(root).unwrap();
4973    }
4974
4975    #[test]
4976    fn indexes_typescript_graph_edges_from_tree_sitter() {
4977        let root = unique_temp_root();
4978        let _ = fs::remove_dir_all(&root);
4979        fs::create_dir_all(root.join("src")).unwrap();
4980        fs::write(
4981            root.join("src/helper.ts"),
4982            "export function helper() {}\nexport const Card = () => null;\n",
4983        )
4984        .unwrap();
4985        fs::write(
4986            root.join("src/App.tsx"),
4987            r#"
4988import { helper, Card } from "./helper";
4989
4990export function run() {
4991  helper();
4992  return <Card />;
4993}
4994
4995export const callRun = () => run();
4996"#,
4997        )
4998        .unwrap();
4999        let config = source_config(root.clone(), Language::TypeScript);
5000        let db = IndexDatabase::rebuild(&config).unwrap();
5001
5002        assert_edge(&db, "run", "helper", "calls_name", "Syntactic");
5003        assert_edge(&db, "run", "Card", "references_type", "Syntactic");
5004        assert_edge(&db, "src/App.tsx", "helper", "imports", "Syntactic");
5005        assert_edge(&db, "src/App.tsx", "run", "exports", "Syntactic");
5006        let callees = db.trace_callees("callRun", 10).unwrap();
5007        assert!(
5008            callees.iter().any(|edge| {
5009                edge.to_symbol.as_deref().is_some_and(|name| name.ends_with("run"))
5010                    && edge.confidence == "Syntactic"
5011            }),
5012            "callRun callees: {callees:?}"
5013        );
5014
5015        fs::remove_dir_all(root).unwrap();
5016    }
5017
5018    #[test]
5019    fn indexes_c_graph_edges_from_tree_sitter() {
5020        let root = unique_temp_root();
5021        let _ = fs::remove_dir_all(&root);
5022        fs::create_dir_all(root.join("src")).unwrap();
5023        fs::write(
5024            root.join("src/runtime.c"),
5025            r#"
5026typedef struct Runtime Runtime;
5027
5028struct Runtime {
5029  int state;
5030};
5031
5032int helper(Runtime *runtime) {
5033  return runtime->state;
5034}
5035
5036int runtime_open(Runtime *runtime) {
5037  return helper(runtime);
5038}
5039"#,
5040        )
5041        .unwrap();
5042        let config = source_config(root.clone(), Language::C);
5043        let db = IndexDatabase::rebuild(&config).unwrap();
5044
5045        assert_edge(&db, "runtime_open", "helper", "calls_name", "Syntactic");
5046
5047        fs::remove_dir_all(root).unwrap();
5048    }
5049
5050    #[test]
5051    fn indexes_c_file_scope_macro_regions_for_search() {
5052        let root = unique_temp_root();
5053        let _ = fs::remove_dir_all(&root);
5054        fs::create_dir_all(root.join("drivers/entropy")).unwrap();
5055        fs::write(
5056            root.join("drivers/entropy/entropy.c"),
5057            r#"
5058static int entropy_init(const struct device *dev)
5059{
5060    ARG_UNUSED(dev);
5061    return 0;
5062}
5063
5064/* Entropy driver APIs structure */
5065static DEVICE_API(entropy, entropy_cryptoacc_trng_api) = {
5066    .get_entropy = entropy_cryptoacc_trng_get_entropy,
5067};
5068
5069DEVICE_DT_INST_DEFINE(0, entropy_init, NULL, NULL, NULL,
5070                      PRE_KERNEL_1, CONFIG_ENTROPY_INIT_PRIORITY,
5071                      &entropy_cryptoacc_trng_api);
5072"#,
5073        )
5074        .unwrap();
5075        let config = Config {
5076            root: root.clone(),
5077            database: root.join(".rag-rat/index.sqlite"),
5078            targets: vec![ResolvedTarget {
5079                name: "c".to_string(),
5080                language: Language::C,
5081                directories: vec![PathBuf::from("drivers/entropy")],
5082                include: vec!["**/*.c".to_string()],
5083                exclude: Vec::new(),
5084                kind: TargetKind::Source,
5085            }],
5086            local_ai: Default::default(),
5087        };
5088        let db = IndexDatabase::rebuild(&config).unwrap();
5089
5090        let hits = db.search("DEVICE_API", 5, false).unwrap();
5091        assert!(
5092            hits.iter().any(|hit| {
5093                hit.path == "drivers/entropy/entropy.c" && hit.summary.contains("DEVICE_API")
5094            }),
5095            "DEVICE_API hits: {hits:?}"
5096        );
5097
5098        fs::remove_dir_all(root).unwrap();
5099    }
5100
5101    #[test]
5102    fn indexes_cpp_graph_edges_from_tree_sitter() {
5103        let root = unique_temp_root();
5104        let _ = fs::remove_dir_all(&root);
5105        fs::create_dir_all(root.join("src")).unwrap();
5106        fs::write(
5107            root.join("src/runtime.cpp"),
5108            r#"
5109namespace held {
5110class Runtime {
5111public:
5112  void open();
5113};
5114
5115void helper() {}
5116
5117void Runtime::open() {
5118  helper();
5119}
5120}
5121"#,
5122        )
5123        .unwrap();
5124        let config = source_config(root.clone(), Language::Cpp);
5125        let db = IndexDatabase::rebuild(&config).unwrap();
5126
5127        assert_edge(&db, "open", "helper", "calls_name", "Syntactic");
5128
5129        fs::remove_dir_all(root).unwrap();
5130    }
5131
5132    #[test]
5133    fn indexes_real_world_typescript_graph_patterns() {
5134        let root = fixture_temp_root("graph-realworld/typescript");
5135        let config = source_config(root.clone(), Language::TypeScript);
5136        let db = IndexDatabase::rebuild(&config).unwrap();
5137
5138        assert_edge(&db, "src/lib.tsx", "DefaultWidget", "imports", "Syntactic");
5139        assert_edge(&db, "src/lib.tsx", "WidgetNS", "imports", "NameOnly");
5140        assert_edge(&db, "src/lib.tsx", "WidgetProps", "imports", "Syntactic");
5141        assert_edge(&db, "src/lib.tsx", "ReExportedWidget", "exports", "NameOnly");
5142        assert_edge(&db, "useWidget", "useMemo", "calls_name", "NameOnly");
5143        assert_edge(&db, "useWidget", "DefaultWidget", "calls_name", "Syntactic");
5144        assert_edge(&db, "Shell", "renderWidget", "calls_name", "NameOnly");
5145        assert_edge(&db, "Shell", "WidgetNS", "references_type", "NameOnly");
5146        assert_edge(&db, "Shell", "DefaultWidget", "references_type", "Syntactic");
5147        assert_edge(&db, "DefaultWidget", "WidgetProps", "references_type", "Syntactic");
5148        let callees = db
5149            .trace_callees_with_options(
5150                "Shell",
5151                10,
5152                &crate::query::graph::GraphTraversalOptions {
5153                    include_references: true,
5154                    edge_kinds: None,
5155                    ..Default::default()
5156                },
5157            )
5158            .unwrap();
5159        assert!(
5160            callees.iter().any(|edge| {
5161                edge.edge_kind == "references_type"
5162                    && edge.edge_confidence == edge.confidence
5163                    && edge.to_symbol.as_deref().is_some_and(|name| name.ends_with("DefaultWidget"))
5164            }),
5165            "Shell callees: {callees:?}"
5166        );
5167
5168        fs::remove_dir_all(root).unwrap();
5169    }
5170
5171    #[test]
5172    fn rust_macro_edges_do_not_resolve_to_same_named_modules() {
5173        let root = unique_temp_root();
5174        let _ = fs::remove_dir_all(&root);
5175        fs::create_dir_all(root.join("src")).unwrap();
5176        fs::write(
5177            root.join("src/lib.rs"),
5178            r#"
5179mod format;
5180
5181fn execute_one() {
5182    let _value = format!("hello");
5183}
5184"#,
5185        )
5186        .unwrap();
5187        fs::write(root.join("src/format.rs"), "pub fn helper() {}\n").unwrap();
5188        let config = source_config(root.clone(), Language::Rust);
5189        let db = IndexDatabase::rebuild(&config).unwrap();
5190
5191        let edge = db
5192            .storage
5193            .connection()
5194            .query_row(
5195                "
5196                SELECT edge_kind, to_name, to_symbol_id, confidence, resolution, evidence
5197                FROM edges
5198                WHERE edge_kind = 'uses_macro'
5199                  AND to_name = 'format'
5200                ",
5201                [],
5202                |row| {
5203                    Ok((
5204                        row.get::<_, String>(0)?,
5205                        row.get::<_, String>(1)?,
5206                        row.get::<_, Option<i64>>(2)?,
5207                        row.get::<_, String>(3)?,
5208                        row.get::<_, String>(4)?,
5209                        row.get::<_, Option<String>>(5)?,
5210                    ))
5211                },
5212            )
5213            .unwrap();
5214        assert_eq!(edge.0, "uses_macro");
5215        assert_eq!(edge.1, "format");
5216        assert_eq!(edge.2, None);
5217        assert_eq!(edge.3, "NameOnly");
5218        assert_eq!(edge.4, "unresolved");
5219        assert!(edge.5.as_deref().is_some_and(|value| value.contains("format!")));
5220
5221        fs::remove_dir_all(root).unwrap();
5222    }
5223
5224    #[test]
5225    fn opening_old_graph_policy_rebuilds_stale_macro_edges() {
5226        let root = unique_temp_root();
5227        let _ = fs::remove_dir_all(&root);
5228        fs::create_dir_all(root.join("src")).unwrap();
5229        fs::write(
5230            root.join("src/lib.rs"),
5231            r#"
5232mod format;
5233
5234fn execute_one() {
5235    let _value = format!("hello");
5236}
5237"#,
5238        )
5239        .unwrap();
5240        fs::write(root.join("src/format.rs"), "pub fn helper() {}\n").unwrap();
5241        let config = source_config(root.clone(), Language::Rust);
5242        let db = IndexDatabase::rebuild(&config).unwrap();
5243        db.storage
5244            .connection()
5245            .execute("UPDATE index_meta SET value = 'old' WHERE key = 'graph_index_version'", [])
5246            .unwrap();
5247        db.storage
5248            .connection()
5249            .execute(
5250                "
5251                UPDATE edges
5252                SET edge_kind = 'calls_name',
5253                    to_symbol_id = (SELECT id FROM symbols WHERE name = 'format' LIMIT 1),
5254                    confidence = 'Syntactic',
5255                    evidence = NULL,
5256                    resolution = 'syntactic'
5257                WHERE to_name = 'format'
5258                ",
5259                [],
5260            )
5261            .unwrap();
5262        drop(db);
5263
5264        let reopened = IndexDatabase::open(&config.database).unwrap();
5265        let edge = reopened
5266            .storage
5267            .connection()
5268            .query_row(
5269                "
5270                SELECT edge_kind, to_symbol_id, confidence, resolution, evidence
5271                FROM edges
5272                WHERE to_name = 'format'
5273                  AND edge_kind = 'uses_macro'
5274                ",
5275                [],
5276                |row| {
5277                    Ok((
5278                        row.get::<_, String>(0)?,
5279                        row.get::<_, Option<i64>>(1)?,
5280                        row.get::<_, String>(2)?,
5281                        row.get::<_, String>(3)?,
5282                        row.get::<_, Option<String>>(4)?,
5283                    ))
5284                },
5285            )
5286            .unwrap();
5287        assert_eq!(edge.0, "uses_macro");
5288        assert_eq!(edge.1, None);
5289        assert_eq!(edge.2, "NameOnly");
5290        assert_eq!(edge.3, "unresolved");
5291        assert!(edge.4.as_deref().is_some_and(|value| value.contains("format!")));
5292
5293        fs::remove_dir_all(root).unwrap();
5294    }
5295
5296    #[test]
5297    fn qualified_common_member_calls_do_not_resolve_by_short_name() {
5298        let root = unique_temp_root();
5299        let _ = fs::remove_dir_all(&root);
5300        fs::create_dir_all(root.join("src")).unwrap();
5301        fs::write(
5302            root.join("src/lib.rs"),
5303            r#"
5304pub struct AlertsStore;
5305
5306impl AlertsStore {
5307    pub fn new() -> Self {
5308        Self
5309    }
5310}
5311
5312pub fn caller() {
5313    let _items: Vec<String> = Vec::new();
5314}
5315"#,
5316        )
5317        .unwrap();
5318        let config = source_config(root.clone(), Language::Rust);
5319        let db = IndexDatabase::rebuild(&config).unwrap();
5320
5321        let edge = db
5322            .storage
5323            .connection()
5324            .query_row(
5325                "
5326                SELECT to_name, target_qualified_name, to_symbol_id, confidence, resolution
5327                FROM edges
5328                WHERE from_name LIKE '%caller'
5329                  AND edge_kind = 'calls_name'
5330                  AND to_name = 'new'
5331                ",
5332                [],
5333                |row| {
5334                    Ok((
5335                        row.get::<_, String>(0)?,
5336                        row.get::<_, Option<String>>(1)?,
5337                        row.get::<_, Option<i64>>(2)?,
5338                        row.get::<_, String>(3)?,
5339                        row.get::<_, String>(4)?,
5340                    ))
5341                },
5342            )
5343            .unwrap();
5344        assert_eq!(edge.0, "new");
5345        assert_eq!(edge.1.as_deref(), Some("Vec::new"));
5346        assert_eq!(edge.2, None);
5347        assert_eq!(edge.3, "NameOnly");
5348        assert_eq!(edge.4, "unresolved");
5349
5350        fs::remove_dir_all(root).unwrap();
5351    }
5352
5353    #[test]
5354    fn macro_edges_do_not_resolve_to_same_named_typescript_symbols() {
5355        let root = unique_temp_root();
5356        let _ = fs::remove_dir_all(&root);
5357        fs::create_dir_all(root.join("src")).unwrap();
5358        fs::write(
5359            root.join("src/lib.rs"),
5360            r#"
5361fn rust_entry() {
5362    let _payload = json!({"ok": true});
5363}
5364"#,
5365        )
5366        .unwrap();
5367        fs::write(root.join("src/preferences.ts"), "export function json() { return {}; }\n")
5368            .unwrap();
5369        let mut config = source_config(root.clone(), Language::Rust);
5370        config.targets.push(ResolvedTarget {
5371            name: "typescript".to_string(),
5372            language: Language::TypeScript,
5373            directories: vec![PathBuf::from("src")],
5374            include: vec!["**/*.ts".to_string()],
5375            exclude: Vec::new(),
5376            kind: TargetKind::Source,
5377        });
5378        let db = IndexDatabase::rebuild(&config).unwrap();
5379
5380        let edge = db
5381            .storage
5382            .connection()
5383            .query_row(
5384                "
5385                SELECT edge_kind, to_name, to_symbol_id, confidence, resolution, evidence
5386                FROM edges
5387                WHERE edge_kind = 'uses_macro'
5388                  AND to_name = 'json'
5389                ",
5390                [],
5391                |row| {
5392                    Ok((
5393                        row.get::<_, String>(0)?,
5394                        row.get::<_, String>(1)?,
5395                        row.get::<_, Option<i64>>(2)?,
5396                        row.get::<_, String>(3)?,
5397                        row.get::<_, String>(4)?,
5398                        row.get::<_, Option<String>>(5)?,
5399                    ))
5400                },
5401            )
5402            .unwrap();
5403        assert_eq!(edge.0, "uses_macro");
5404        assert_eq!(edge.1, "json");
5405        assert_eq!(edge.2, None);
5406        assert_eq!(edge.3, "NameOnly");
5407        assert_eq!(edge.4, "unresolved");
5408        assert!(edge.5.as_deref().is_some_and(|value| value.contains("json!")));
5409
5410        fs::remove_dir_all(root).unwrap();
5411    }
5412
5413    #[test]
5414    fn qualified_crate_helper_callers_use_name_fallback() {
5415        let root = unique_temp_root();
5416        let _ = fs::remove_dir_all(&root);
5417        fs::create_dir_all(root.join("src")).unwrap();
5418        fs::write(
5419            root.join("src/lib.rs"),
5420            r#"
5421pub mod task_spawn {
5422    pub fn spawn_blocking() {}
5423}
5424
5425pub fn first() {
5426    crate::task_spawn::spawn_blocking();
5427}
5428
5429pub fn second() {
5430    task_spawn::spawn_blocking();
5431}
5432"#,
5433        )
5434        .unwrap();
5435        let config = source_config(root.clone(), Language::Rust);
5436        let db = IndexDatabase::rebuild(&config).unwrap();
5437
5438        let callers = db.find_callers("spawn_blocking", 10).unwrap();
5439        assert!(
5440            callers.iter().any(|edge| {
5441                edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("first"))
5442                    && edge.edge_kind == "calls_name"
5443                    && edge.resolution == "target_name_fallback"
5444            }),
5445            "spawn_blocking callers: {callers:?}"
5446        );
5447        assert!(
5448            callers.iter().any(|edge| {
5449                edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("second"))
5450                    && edge.edge_kind == "calls_name"
5451            }),
5452            "spawn_blocking callers: {callers:?}"
5453        );
5454
5455        fs::remove_dir_all(root).unwrap();
5456    }
5457
5458    #[test]
5459    fn caller_lookup_does_not_match_related_names_or_chain_evidence() {
5460        let root = unique_temp_root();
5461        let _ = fs::remove_dir_all(&root);
5462        fs::create_dir_all(root.join("src")).unwrap();
5463        fs::write(
5464            root.join("src/lib.rs"),
5465            r#"
5466pub mod runtime {
5467    pub mod task_spawn {
5468        pub fn spawn() {}
5469        pub fn spawn_blocking() -> JoinHandle {
5470            JoinHandle
5471        }
5472        pub fn spawn_blocking_handle() {}
5473        pub fn spawn_blocking_offload() -> JoinHandle {
5474            JoinHandle
5475        }
5476    }
5477}
5478
5479pub struct JoinHandle;
5480
5481impl JoinHandle {
5482    pub fn map_err(self) {}
5483}
5484
5485pub fn direct() {
5486    crate::runtime::task_spawn::spawn_blocking();
5487}
5488
5489pub fn related_handle() {
5490    crate::runtime::task_spawn::spawn_blocking_handle();
5491}
5492
5493pub fn related_offload_chain() {
5494    crate::runtime::task_spawn::spawn_blocking_offload().map_err();
5495}
5496
5497pub fn related_spawn_with_text() {
5498    crate::runtime::task_spawn::spawn();
5499}
5500"#,
5501        )
5502        .unwrap();
5503        let config = source_config(root.clone(), Language::Rust);
5504        let db = IndexDatabase::rebuild(&config).unwrap();
5505
5506        let callers = db.find_callers("spawn_blocking", 20).unwrap();
5507        assert!(
5508            callers.iter().any(|edge| {
5509                edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("direct"))
5510                    && edge.target.as_deref() == Some("spawn_blocking")
5511                    && edge.edge_kind == "calls_name"
5512            }),
5513            "spawn_blocking callers: {callers:?}"
5514        );
5515        assert!(
5516            callers.iter().all(|edge| {
5517                !edge.from_symbol.as_deref().is_some_and(|name| {
5518                    name.ends_with("related_handle")
5519                        || name.ends_with("related_offload_chain")
5520                        || name.ends_with("related_spawn_with_text")
5521                }) && !matches!(
5522                    edge.target.as_deref(),
5523                    Some("spawn_blocking_handle" | "spawn_blocking_offload" | "spawn" | "map_err")
5524                )
5525            }),
5526            "caller lookup leaked related names or chain evidence: {callers:?}"
5527        );
5528
5529        let qualified_callers = db.find_callers("src/lib.rs::spawn_blocking", 20).unwrap();
5530        assert!(
5531            qualified_callers.iter().any(|edge| {
5532                edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("direct"))
5533                    && edge.target.as_deref() == Some("spawn_blocking")
5534                    && edge.edge_kind == "calls_name"
5535            }),
5536            "qualified spawn_blocking callers: {qualified_callers:?}"
5537        );
5538        assert!(
5539            qualified_callers.iter().all(|edge| {
5540                !edge.from_symbol.as_deref().is_some_and(|name| {
5541                    name.ends_with("related_handle")
5542                        || name.ends_with("related_offload_chain")
5543                        || name.ends_with("related_spawn_with_text")
5544                }) && !matches!(
5545                    edge.target.as_deref(),
5546                    Some("spawn_blocking_handle" | "spawn_blocking_offload" | "spawn" | "map_err")
5547                )
5548            }),
5549            "qualified caller lookup leaked related names or chain evidence: {qualified_callers:?}"
5550        );
5551
5552        fs::remove_dir_all(root).unwrap();
5553    }
5554
5555    #[test]
5556    fn files_past_the_old_structural_cap_still_contribute_symbols_and_edges() {
5557        let root = unique_temp_root();
5558        let _ = fs::remove_dir_all(&root);
5559        fs::create_dir_all(root.join("src")).unwrap();
5560        let filler =
5561            (0..700).map(|idx| format!("pub fn filler_{idx}() {{}}\n")).collect::<String>();
5562        fs::write(
5563            root.join("src/lib.rs"),
5564            format!(
5565                r#"
5566pub mod task_spawn {{
5567    pub fn spawn_blocking() {{}}
5568}}
5569
5570{filler}
5571
5572pub fn caller() {{
5573    crate::task_spawn::spawn_blocking();
5574}}
5575"#
5576            ),
5577        )
5578        .unwrap();
5579        let config = source_config(root.clone(), Language::Rust);
5580        assert!(fs::metadata(root.join("src/lib.rs")).unwrap().len() > 10_000);
5581        let db = IndexDatabase::rebuild(&config).unwrap();
5582
5583        let symbols = db.symbols("caller", Some(Language::Rust), 10).unwrap();
5584        assert!(
5585            symbols.iter().any(|symbol| symbol.name == "caller"),
5586            "caller symbols: {symbols:?}"
5587        );
5588        let callers = db.find_callers("spawn_blocking", 10).unwrap();
5589        assert!(
5590            callers.iter().any(|edge| {
5591                edge.edge_kind == "calls_name"
5592                    && edge.target.as_deref() == Some("spawn_blocking")
5593                    && edge.callsite.as_ref().is_some_and(|callsite| callsite.line > 700)
5594            }),
5595            "spawn_blocking callers: {callers:?}"
5596        );
5597        let impact =
5598            db.impact_surface("callers of crate::task_spawn::spawn_blocking in src", 10).unwrap();
5599        assert!(
5600            impact.iter().any(|item| {
5601                item.category == "Direct structural impact" && item.reason == "direct_caller"
5602            }),
5603            "impact: {impact:?}"
5604        );
5605
5606        fs::remove_dir_all(root).unwrap();
5607    }
5608
5609    #[test]
5610    fn impact_surface_uses_high_signal_query_symbols_and_call_edges() {
5611        let root = unique_temp_root();
5612        let _ = fs::remove_dir_all(&root);
5613        fs::create_dir_all(root.join("src")).unwrap();
5614        fs::write(
5615            root.join("src/lib.rs"),
5616            r#"
5617pub mod runtime {
5618    pub fn unrelated_runtime_symbol() {}
5619}
5620
5621pub mod task_spawn {
5622    pub fn spawn_blocking<F, T>(f: F) -> T
5623    where
5624        F: FnOnce() -> T + Send + 'static,
5625        T: Send + 'static,
5626    {
5627        f()
5628    }
5629}
5630
5631pub fn caller() {
5632    crate::task_spawn::spawn_blocking(|| 1);
5633}
5634"#,
5635        )
5636        .unwrap();
5637        let config = source_config(root.clone(), Language::Rust);
5638        let db = IndexDatabase::rebuild(&config).unwrap();
5639        let impact = db
5640            .impact_surface(
5641                "change runtime task_spawn spawn_blocking wasm inline native blocking pool",
5642                20,
5643            )
5644            .unwrap();
5645        assert!(
5646            impact.iter().any(|item| {
5647                item.category == "Direct structural impact"
5648                    && item.reason == "direct_caller"
5649                    && item.symbol.as_deref().is_some_and(|symbol| symbol.ends_with("caller"))
5650            }),
5651            "spawn_blocking caller should be present: {impact:?}"
5652        );
5653        assert!(
5654            impact.iter().all(|item| {
5655                !(item.reason == "exact_symbol_definition"
5656                    && item.symbol.as_deref().is_some_and(|symbol| symbol.ends_with("runtime")))
5657            }),
5658            "broad `runtime` token should not become an exact impact seed: {impact:?}"
5659        );
5660        assert!(
5661            impact.iter().all(|item| {
5662                !item.evidence.iter().any(|evidence| evidence.contains("references_type"))
5663                    && item.symbol.as_deref() != Some("Send")
5664            }),
5665            "type references should not appear as direct impact: {impact:?}"
5666        );
5667
5668        fs::remove_dir_all(root).unwrap();
5669    }
5670
5671    #[test]
5672    fn docs_for_symbol_prefers_local_source_context_before_broad_markdown() {
5673        let root = unique_temp_root();
5674        let _ = fs::remove_dir_all(&root);
5675        fs::create_dir_all(root.join("src/runtime")).unwrap();
5676        fs::create_dir_all(root.join("docs")).unwrap();
5677        fs::write(
5678            root.join("src/runtime/task_spawn.rs"),
5679            r#"
5680pub fn spawn_blocking<F, T>(f: F) -> T
5681where
5682    F: FnOnce() -> T + Send + 'static,
5683    T: Send + 'static,
5684{
5685    f()
5686}
5687"#,
5688        )
5689        .unwrap();
5690        fs::write(
5691            root.join("docs/phrase-persistence.md"),
5692            "# Phrase persistence\nUnrelated notes mention spawn_blocking in passing.\n",
5693        )
5694        .unwrap();
5695        fs::write(
5696            root.join("docs/task_spawn.md"),
5697            "# task_spawn\nLocal task_spawn notes explain spawn_blocking.\n",
5698        )
5699        .unwrap();
5700        let config = Config {
5701            root: root.clone(),
5702            database: root.join(".rag-rat/index.sqlite"),
5703            targets: vec![
5704                ResolvedTarget {
5705                    name: "rust".to_string(),
5706                    language: Language::Rust,
5707                    directories: vec![PathBuf::from("src")],
5708                    include: vec!["src/".to_string()],
5709                    exclude: Vec::new(),
5710                    kind: TargetKind::Source,
5711                },
5712                ResolvedTarget {
5713                    name: "markdown".to_string(),
5714                    language: Language::Markdown,
5715                    directories: vec![PathBuf::from("docs")],
5716                    include: vec!["**/*.md".to_string()],
5717                    exclude: Vec::new(),
5718                    kind: TargetKind::Docs,
5719                },
5720            ],
5721            local_ai: Default::default(),
5722        };
5723        let db = IndexDatabase::rebuild(&config).unwrap();
5724        let symbol = db.symbols("spawn_blocking", Some(Language::Rust), 10).unwrap().remove(0);
5725        let hits = db.docs_for_selected_symbol(&symbol, 10).unwrap();
5726        assert_eq!(hits[0].path, "src/runtime/task_spawn.rs", "docs hits: {hits:?}");
5727        let phrase_index = hits.iter().position(|hit| hit.path == "docs/phrase-persistence.md");
5728        let task_spawn_index = hits.iter().position(|hit| hit.path == "docs/task_spawn.md");
5729        assert!(
5730            phrase_index.is_none_or(|phrase| task_spawn_index.is_some_and(|local| local < phrase)),
5731            "path-local task_spawn docs should outrank unrelated phrase docs: {hits:?}"
5732        );
5733
5734        fs::remove_dir_all(root).unwrap();
5735    }
5736
5737    #[test]
5738    fn partial_tree_sitter_trees_still_contribute_valid_symbols_and_edges() {
5739        let root = unique_temp_root();
5740        let _ = fs::remove_dir_all(&root);
5741        fs::create_dir_all(root.join("src")).unwrap();
5742        fs::write(
5743            root.join("src/lib.rs"),
5744            r#"
5745pub fn helper() {}
5746
5747pub fn caller() {
5748    helper();
5749}
5750
5751fn broken( {
5752"#,
5753        )
5754        .unwrap();
5755        let config = source_config(root.clone(), Language::Rust);
5756        let db = IndexDatabase::rebuild(&config).unwrap();
5757
5758        let symbols = db.symbols("caller", Some(Language::Rust), 10).unwrap();
5759        assert!(
5760            symbols.iter().any(|symbol| symbol.name == "caller"),
5761            "caller symbols: {symbols:?}"
5762        );
5763        assert_edge(&db, "caller", "helper", "calls_name", "Syntactic");
5764
5765        fs::remove_dir_all(root).unwrap();
5766    }
5767
5768    #[test]
5769    fn receiver_method_calls_do_not_bind_to_same_named_free_functions() {
5770        let root = unique_temp_root();
5771        let _ = fs::remove_dir_all(&root);
5772        fs::create_dir_all(root.join("src")).unwrap();
5773        fs::write(
5774            root.join("src/lib.rs"),
5775            r#"
5776pub fn spawn_blocking() {}
5777
5778pub fn caller(joinset: JoinSet) {
5779    joinset.spawn_blocking();
5780}
5781
5782pub struct JoinSet;
5783"#,
5784        )
5785        .unwrap();
5786        let config = source_config(root.clone(), Language::Rust);
5787        let db = IndexDatabase::rebuild(&config).unwrap();
5788
5789        let edge = db
5790            .storage
5791            .connection()
5792            .query_row(
5793                "
5794                SELECT to_name, target_qualified_name, to_symbol_id, confidence, resolution, receiver_hint
5795                FROM edges
5796                WHERE from_name LIKE '%caller'
5797                  AND edge_kind = 'calls_name'
5798                  AND to_name = 'spawn_blocking'
5799                ",
5800                [],
5801                |row| {
5802                    Ok((
5803                        row.get::<_, String>(0)?,
5804                        row.get::<_, Option<String>>(1)?,
5805                        row.get::<_, Option<i64>>(2)?,
5806                        row.get::<_, String>(3)?,
5807                        row.get::<_, String>(4)?,
5808                        row.get::<_, Option<String>>(5)?,
5809                    ))
5810                },
5811            )
5812            .unwrap();
5813        assert_eq!(edge.0, "spawn_blocking");
5814        assert_eq!(edge.1.as_deref(), Some("joinset::spawn_blocking"));
5815        assert_eq!(edge.2, None);
5816        assert_eq!(edge.3, "NameOnly");
5817        assert_eq!(edge.4, "unresolved");
5818        assert_eq!(edge.5.as_deref(), Some("joinset"));
5819
5820        fs::remove_dir_all(root).unwrap();
5821    }
5822
5823    #[test]
5824    fn trace_callees_excludes_type_references_by_default() {
5825        let root = unique_temp_root();
5826        let _ = fs::remove_dir_all(&root);
5827        fs::create_dir_all(root.join("src")).unwrap();
5828        fs::write(
5829            root.join("src/lib.rs"),
5830            r#"
5831pub struct JoinError;
5832pub enum Result<T, E> { Ok(T), Err(E) }
5833pub fn helper() {}
5834
5835pub fn spawn_blocking<F, T>(f: F) -> Result<T, JoinError>
5836where
5837    F: FnOnce() -> T + Send + 'static,
5838    T: Send + 'static,
5839{
5840    helper();
5841    tokio::task::spawn_blocking(f)
5842}
5843"#,
5844        )
5845        .unwrap();
5846        let config = source_config(root.clone(), Language::Rust);
5847        let db = IndexDatabase::rebuild(&config).unwrap();
5848
5849        let default_callees = db.trace_callees("spawn_blocking", 20).unwrap();
5850        assert!(
5851            default_callees.iter().any(|edge| {
5852                edge.edge_kind == "calls_name"
5853                    && edge.target.as_deref() == Some("helper")
5854                    && edge.verified_target_symbol
5855            }),
5856            "default callees: {default_callees:?}"
5857        );
5858        assert!(
5859            default_callees
5860                .iter()
5861                .all(|edge| edge.target_qualified_name.as_deref()
5862                    != Some("tokio::task::spawn_blocking")),
5863            "default callees leaked unresolved external call: {default_callees:?}"
5864        );
5865        assert!(
5866            default_callees.iter().all(|edge| edge.edge_kind != "references_type"),
5867            "default callees leaked type refs: {default_callees:?}"
5868        );
5869        assert!(
5870            default_callees.iter().all(|edge| !matches!(
5871                edge.target.as_deref(),
5872                Some("F" | "T" | "Send" | "Result" | "JoinError")
5873            )),
5874            "default callees leaked generic/type targets: {default_callees:?}"
5875        );
5876
5877        let with_refs = db
5878            .trace_callees_with_options(
5879                "spawn_blocking",
5880                20,
5881                &crate::query::graph::GraphTraversalOptions {
5882                    include_references: true,
5883                    edge_kinds: None,
5884                    ..Default::default()
5885                },
5886            )
5887            .unwrap();
5888        assert!(
5889            with_refs.iter().any(|edge| edge.edge_kind == "references_type"),
5890            "reference-enabled callees: {with_refs:?}"
5891        );
5892
5893        let with_unresolved = db
5894            .trace_callees_with_options(
5895                "spawn_blocking",
5896                20,
5897                &crate::query::graph::GraphTraversalOptions {
5898                    include_unresolved: true,
5899                    ..Default::default()
5900                },
5901            )
5902            .unwrap();
5903        assert!(
5904            with_unresolved
5905                .iter()
5906                .any(|edge| edge.target_qualified_name.as_deref()
5907                    == Some("tokio::task::spawn_blocking")),
5908            "unresolved-enabled callees: {with_unresolved:?}"
5909        );
5910
5911        fs::remove_dir_all(root).unwrap();
5912    }
5913
5914    #[test]
5915    fn trace_callees_defaults_to_repo_relevant_calls() {
5916        let root = unique_temp_root();
5917        let _ = fs::remove_dir_all(&root);
5918        fs::create_dir_all(root.join("src")).unwrap();
5919        fs::write(
5920            root.join("src/lib.rs"),
5921            r#"
5922pub fn repo_helper() {}
5923
5924pub fn caller(input: Result<String, String>) -> String {
5925    repo_helper();
5926    let values: Vec<String> = Vec::new();
5927    let _ = input.map_err(|error| error.to_string());
5928    let _ = Some("value").unwrap_or_else(|| "fallback");
5929    let _ = format!("hello");
5930    values.get(0).unwrap_or_else(|| "fallback").to_string()
5931}
5932"#,
5933        )
5934        .unwrap();
5935        let config = source_config(root.clone(), Language::Rust);
5936        let db = IndexDatabase::rebuild(&config).unwrap();
5937
5938        let default_callees = db.trace_callees("caller", 20).unwrap();
5939        assert!(
5940            default_callees.iter().any(|edge| edge.target.as_deref() == Some("repo_helper")),
5941            "default callees should keep repo-local calls: {default_callees:?}"
5942        );
5943        assert!(
5944            default_callees.iter().all(|edge| {
5945                edge.edge_kind != "uses_macro"
5946                    && !matches!(
5947                        edge.target.as_deref(),
5948                        Some("new" | "map_err" | "unwrap_or_else" | "to_string" | "format")
5949                    )
5950            }),
5951            "default callees leaked low-signal calls: {default_callees:?}"
5952        );
5953
5954        let expanded = db
5955            .trace_callees_with_options(
5956                "caller",
5957                20,
5958                &crate::query::graph::GraphTraversalOptions {
5959                    include_unresolved: true,
5960                    include_macros: true,
5961                    include_common_methods: true,
5962                    ..Default::default()
5963                },
5964            )
5965            .unwrap();
5966        assert!(
5967            expanded.iter().any(|edge| edge.edge_kind == "uses_macro"),
5968            "macro-enabled callees: {expanded:?}"
5969        );
5970        assert!(
5971            expanded.iter().any(|edge| edge.target.as_deref() == Some("unwrap_or_else")),
5972            "common-method-enabled callees: {expanded:?}"
5973        );
5974
5975        fs::remove_dir_all(root).unwrap();
5976    }
5977
5978    #[test]
5979    fn indexes_kotlin_graph_edges_from_tree_sitter() {
5980        let root = unique_temp_root();
5981        let _ = fs::remove_dir_all(&root);
5982        fs::create_dir_all(root.join("src")).unwrap();
5983        fs::write(
5984            root.join("src/Main.kt"),
5985            r#"
5986package dev.cq27.test
5987
5988import dev.cq27.lib.ExternalThing
5989
5990interface Syncable
5991
5992class MainBridge : Syncable {
5993  suspend fun syncOnce() {
5994    helper()
5995    ExternalThing()
5996  }
5997}
5998
5999fun helper() {}
6000"#,
6001        )
6002        .unwrap();
6003        let config = source_config(root.clone(), Language::Kotlin);
6004        let db = IndexDatabase::rebuild(&config).unwrap();
6005
6006        assert_edge(&db, "syncOnce", "helper", "calls_name", "Syntactic");
6007        assert_edge(&db, "MainBridge", "Syncable", "implements", "Syntactic");
6008        assert_edge(&db, "src/Main.kt", "ExternalThing", "imports", "NameOnly");
6009        let impact = db.impact_surface("helper", 10).unwrap();
6010        assert!(
6011            impact.iter().any(|item| {
6012                item.category == "Direct structural impact" && item.reason == "direct_caller"
6013            }),
6014            "impact: {impact:?}"
6015        );
6016
6017        fs::remove_dir_all(root).unwrap();
6018    }
6019
6020    #[test]
6021    fn indexes_real_world_kotlin_graph_patterns() {
6022        let root = fixture_temp_root("graph-realworld/kotlin");
6023        let config = source_config(root.clone(), Language::Kotlin);
6024        let db = IndexDatabase::rebuild(&config).unwrap();
6025
6026        assert_edge(&db, "src/Main.kt", "ExternalFactory", "imports", "NameOnly");
6027        assert_edge(&db, "Worker", "companion", "contains", "Exact");
6028        assert_edge(&db, "companion", "create", "contains", "Exact");
6029        assert_edge(&db, "syncOnce", "create", "calls_name", "Syntactic");
6030        assert_edge(&db, "syncOnce", "Worker", "references_type", "Syntactic");
6031        assert_edge(&db, "syncOnce", "run", "calls_name", "Syntactic");
6032        assert_edge(&db, "syncOnce", "SingletonRunner", "references_type", "Syntactic");
6033        assert_edge(&db, "syncOnce", "ExternalFactory", "calls_name", "NameOnly");
6034        assert_edge(&db, "syncOnce", "ExternalFactory", "references_type", "NameOnly");
6035        assert_edge(&db, "syncOnce", "cleaned", "calls_name", "Syntactic");
6036        let callers = db.find_callers("cleaned", 10).unwrap();
6037        assert!(
6038            callers.iter().any(|edge| {
6039                edge.edge_kind == "calls_name"
6040                    && edge.edge_confidence == edge.confidence
6041                    && edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("syncOnce"))
6042            }),
6043            "cleaned callers: {callers:?}"
6044        );
6045
6046        fs::remove_dir_all(root).unwrap();
6047    }
6048
6049    #[test]
6050    fn kotlin_caller_lookup_respects_qualified_receivers_for_common_method_names() {
6051        let root = unique_temp_root();
6052        let _ = fs::remove_dir_all(&root);
6053        fs::create_dir_all(root.join("src")).unwrap();
6054        fs::write(
6055            root.join("src/Main.kt"),
6056            r#"
6057package dev.cq27.test
6058
6059object WatchProposalBuilder {
6060  fun build(): String = "proposal"
6061}
6062
6063class AndroidDialogBuilder {
6064  fun build(): String = "dialog"
6065}
6066
6067fun actualCaller() {
6068  WatchProposalBuilder.build()
6069}
6070
6071fun unrelatedBuilderCalls(dialog: AndroidDialogBuilder) {
6072  dialog.build()
6073  AndroidDialogBuilder().build()
6074}
6075"#,
6076        )
6077        .unwrap();
6078        let config = source_config(root.clone(), Language::Kotlin);
6079        let db = IndexDatabase::rebuild(&config).unwrap();
6080        let target = db
6081            .symbols("build", Some(Language::Kotlin), 10)
6082            .unwrap()
6083            .into_iter()
6084            .find(|symbol| symbol.qualified_name.contains("WatchProposalBuilder"))
6085            .expect("WatchProposalBuilder.build symbol");
6086        let callers = db
6087            .find_callers_with_options(
6088                "build",
6089                20,
6090                &crate::query::graph::GraphTraversalOptions {
6091                    resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
6092                    symbol_id: Some(target.symbol_id),
6093                    ..Default::default()
6094                },
6095            )
6096            .unwrap();
6097        assert_eq!(
6098            callers
6099                .iter()
6100                .filter(|edge| edge
6101                    .from_symbol
6102                    .as_deref()
6103                    .is_some_and(|name| name.ends_with("actualCaller")))
6104                .count(),
6105            1,
6106            "actual caller should be present once: {callers:?}"
6107        );
6108        assert!(
6109            callers.iter().all(|edge| edge
6110                .from_symbol
6111                .as_deref()
6112                .is_none_or(|name| !name.ends_with("unrelatedBuilderCalls"))),
6113            "unrelated builder calls should not resolve to WatchProposalBuilder.build: {callers:?}"
6114        );
6115
6116        fs::remove_dir_all(root).unwrap();
6117    }
6118
6119    #[test]
6120    fn github_sync_caches_papertrail_and_rationale_without_query_time_crawling() {
6121        let (root, config) =
6122            markdown_config("# Decision\nRefs cq27-dev/rag-rat#42\nwe will keep sqlite\n");
6123        let db = IndexDatabase::rebuild(&config).unwrap();
6124        let mock = MockGitHubClient;
6125
6126        let offline =
6127            github::sync_from_refs::<MockGitHubClient>(db.storage.connection(), &root, None, true)
6128                .unwrap();
6129        assert!(offline.offline);
6130        assert_eq!(offline.discovered_refs, 1);
6131        assert_eq!(offline.synced_items, 0);
6132
6133        let report =
6134            github::sync_from_refs(db.storage.connection(), &root, Some(&mock), false).unwrap();
6135        assert!(!report.offline);
6136        assert_eq!(report.discovered_refs, 1);
6137        assert_eq!(report.synced_items, 5);
6138        assert_eq!(report.status.issues, 1);
6139        assert_eq!(report.status.comments, 1);
6140        assert_eq!(report.status.pulls, 1);
6141        assert_eq!(report.status.reviews, 1);
6142        assert_eq!(report.status.review_comments, 1);
6143
6144        let issue_hits = db.github_issue_search("sqlite", 10).unwrap();
6145        assert_eq!(issue_hits.len(), 1);
6146        assert_eq!(issue_hits[0].classification, "decision");
6147        assert_eq!(issue_hits[0].evidence_kind, "historical_github");
6148
6149        let refs = db.github_refs_for_path("docs/search.md", 10).unwrap();
6150        assert_eq!(refs.len(), 1);
6151        assert_eq!(refs[0].source_kind, "file");
6152
6153        let rationale = db.rationale_search("risk", 10).unwrap();
6154        assert!(rationale.iter().any(|item| item.classification == "risk"));
6155        let issue_ref_rationale = db.rationale_search("Fixes #42", 10).unwrap();
6156        assert_eq!(issue_ref_rationale.first().map(|item| item.number), Some(42));
6157        assert_eq!(
6158            issue_ref_rationale.first().map(|item| item.evidence_kind),
6159            Some("literal_github_ref")
6160        );
6161        assert_eq!(issue_ref_rationale.first().map(|item| item.score), Some(1.0));
6162        assert!(
6163            issue_ref_rationale.iter().any(|item| item.number == 42),
6164            "issue ref rationale should use structured GitHub refs: {issue_ref_rationale:?}"
6165        );
6166
6167        let chunk_id = first_chunk_id(&db);
6168        let papertrail = db.papertrail_for_chunk(chunk_id, 10).unwrap().unwrap();
6169        assert!(papertrail.current_source.is_some());
6170        assert!(!papertrail.github_evidence.is_empty());
6171        assert!(papertrail.github_evidence.iter().all(|item| {
6172            matches!(item.evidence_kind, "historical_github" | "literal_github_ref")
6173        }));
6174
6175        fs::remove_dir_all(root).unwrap();
6176    }
6177
6178    #[test]
6179    fn papertrail_for_commit_prefers_commit_sourced_github_refs() {
6180        let root = unique_temp_root();
6181        let _ = fs::remove_dir_all(&root);
6182        fs::create_dir_all(root.join("docs")).unwrap();
6183        run_git(&root, &["init"]);
6184        run_git(&root, &["config", "user.name", "Rag Rat"]);
6185        run_git(&root, &["config", "user.email", "rag@example.com"]);
6186        fs::write(root.join("docs/search.md"), "# Decision\nalpha\n").unwrap();
6187        run_git(&root, &["add", "."]);
6188        run_git(&root, &["commit", "-m", "Fix search rationale", "-m", "Fixes #42"]);
6189
6190        let config = markdown_config_for_root(root.clone());
6191        let db = IndexDatabase::rebuild(&config).unwrap();
6192        let commit = db
6193            .storage
6194            .connection()
6195            .query_row("SELECT hash FROM git_commits LIMIT 1", [], |row| row.get::<_, String>(0))
6196            .unwrap();
6197        let mock = MockGitHubClient;
6198        github::sync_from_refs(db.storage.connection(), &root, Some(&mock), false).unwrap();
6199
6200        let papertrail = db.papertrail_for_commit(&commit[..7], 10).unwrap();
6201        assert_eq!(papertrail.github_evidence.first().map(|item| item.number), Some(42));
6202        assert_eq!(
6203            papertrail.github_evidence.first().map(|item| item.evidence_kind),
6204            Some("literal_github_ref")
6205        );
6206        assert!(
6207            papertrail.fallback_github_evidence.is_empty(),
6208            "structured commit refs should suppress noisy fallback evidence: {papertrail:?}"
6209        );
6210
6211        fs::remove_dir_all(root).unwrap();
6212    }
6213
6214    #[test]
6215    fn papertrail_for_symbol_dedupes_duplicate_file_refs() {
6216        let root = unique_temp_root();
6217        let _ = fs::remove_dir_all(&root);
6218        fs::create_dir_all(root.join("src")).unwrap();
6219        fs::write(
6220            root.join("src/lib.rs"),
6221            "// First rationale (#42)\n// Second rationale (#42)\npub fn tracked_symbol() {}\n",
6222        )
6223        .unwrap();
6224        let config = source_config(root.clone(), Language::Rust);
6225        let db = IndexDatabase::rebuild(&config).unwrap();
6226        let mock = MockGitHubClient;
6227        github::sync_from_refs(db.storage.connection(), &root, Some(&mock), false).unwrap();
6228        let papertrail = db
6229            .papertrail_for_symbol("tracked_symbol", Some(Language::Rust), 10)
6230            .unwrap()
6231            .expect("tracked symbol papertrail");
6232
6233        assert_eq!(
6234            papertrail
6235                .github_evidence
6236                .iter()
6237                .filter(|item| item.number == 42 && item.item_kind == "issue")
6238                .count(),
6239            1,
6240            "duplicate #42 refs in one file should collapse to one issue evidence row: {papertrail:?}"
6241        );
6242
6243        fs::remove_dir_all(root).unwrap();
6244    }
6245
6246    #[test]
6247    fn github_sync_keeps_partial_cache_and_skips_synced_refs_after_404() {
6248        let (root, config) = markdown_config(
6249            "# Decision\nRefs cq27-dev/rag-rat#42 and cq27-dev/rag-rat#404\nwe will keep sqlite\n",
6250        );
6251        let db = IndexDatabase::rebuild(&config).unwrap();
6252        let mock = PartiallyFailingGitHubClient;
6253
6254        let report =
6255            github::sync_from_refs(db.storage.connection(), &root, Some(&mock), false).unwrap();
6256        assert_eq!(report.discovered_refs, 2);
6257        assert_eq!(report.synced_items, 5);
6258        assert_eq!(report.failed_refs, 1);
6259        assert_eq!(report.errors.len(), 1);
6260        assert_eq!(report.errors[0].number, 404);
6261        assert_eq!(report.errors[0].status, "not_found");
6262
6263        let issue_hits = db.github_issue_search("sqlite", 10).unwrap();
6264        assert_eq!(issue_hits.len(), 1);
6265        assert_eq!(issue_hits[0].number, 42);
6266
6267        let second =
6268            github::sync_from_refs(db.storage.connection(), &root, Some(&mock), false).unwrap();
6269        assert_eq!(second.synced_items, 0);
6270        assert_eq!(second.skipped_refs, 2);
6271        assert_eq!(second.failed_refs, 0);
6272
6273        fs::remove_dir_all(root).unwrap();
6274    }
6275
6276    #[test]
6277    fn search_recovers_when_fts_is_marked_dirty() {
6278        let (root, config) = markdown_config("alpha token");
6279        let db = IndexDatabase::rebuild(&config).unwrap();
6280        db.mark_fts_dirty().unwrap();
6281
6282        let dirty = db.status(&config.database).unwrap();
6283        assert!(dirty.fts_dirty);
6284        assert!(!dirty.fts_fresh);
6285
6286        let hits = db.search("alpha", 10, false).unwrap();
6287        assert_eq!(hits.len(), 1);
6288        assert_eq!(hits[0].summary, "alpha token");
6289        let fresh = db.status(&config.database).unwrap();
6290        assert!(!fresh.fts_dirty);
6291        assert!(fresh.fts_fresh);
6292
6293        fs::remove_dir_all(root).unwrap();
6294    }
6295
6296    #[test]
6297    fn read_chunk_relocates_small_line_drift_to_current_text() {
6298        let (root, config) = markdown_config("# Title\nalpha token\n");
6299        let db = IndexDatabase::rebuild(&config).unwrap();
6300        let chunk_id = first_chunk_id(&db);
6301        fs::write(root.join("docs/search.md"), "inserted\n# Title\nalpha token\n").unwrap();
6302
6303        let chunk = db.read_chunk(chunk_id).unwrap().unwrap();
6304        assert_eq!(chunk.start_line, 2);
6305        assert_eq!(chunk.end_line, 3);
6306        assert_eq!(chunk.text, "# Title\nalpha token\n");
6307
6308        fs::remove_dir_all(root).unwrap();
6309    }
6310
6311    #[test]
6312    fn read_chunk_large_drift_reindexes_and_reports_stale_chunk() {
6313        let (root, config) = markdown_config("# Title\nalpha token\n");
6314        let db = IndexDatabase::rebuild(&config).unwrap();
6315        let chunk_id = first_chunk_id(&db);
6316        fs::write(root.join("docs/search.md"), "# Replacement\nbeta token\n").unwrap();
6317
6318        let err = db.read_chunk(chunk_id).unwrap_err().to_string();
6319        assert!(err.contains("StaleChunk"), "{err}");
6320        let hits = db.search("beta", 10, false).unwrap();
6321        assert_eq!(hits.len(), 1);
6322        assert!(db.search("alpha", 10, false).unwrap().is_empty());
6323
6324        fs::remove_dir_all(root).unwrap();
6325    }
6326
6327    #[test]
6328    fn search_retries_after_healing_stale_hit() {
6329        let (root, config) = markdown_config("# Title\nalpha token\n");
6330        let db = IndexDatabase::rebuild(&config).unwrap();
6331        fs::write(root.join("docs/search.md"), "# Title\nbeta token\n").unwrap();
6332
6333        let hits = db.search("alpha", 10, false).unwrap();
6334        assert!(hits.is_empty());
6335        let beta_hits = db.search("beta", 10, false).unwrap();
6336        assert_eq!(beta_hits.len(), 1);
6337        assert!(beta_hits[0].summary.contains("beta"));
6338
6339        fs::remove_dir_all(root).unwrap();
6340    }
6341
6342    #[test]
6343    fn search_heals_relocated_hits_before_returning_line_spans() {
6344        let (root, config) = markdown_config("# Title\nalpha token\n");
6345        let db = IndexDatabase::rebuild(&config).unwrap();
6346        fs::write(root.join("docs/search.md"), "inserted\n# Title\nalpha token\n").unwrap();
6347
6348        let hits = db.search("alpha", 10, false).unwrap();
6349        assert_eq!(hits.len(), 1);
6350        assert_eq!(hits[0].start_line, 2);
6351        assert_eq!(hits[0].end_line, 3);
6352        assert!(hits[0].summary.contains("alpha"));
6353
6354        fs::remove_dir_all(root).unwrap();
6355    }
6356
6357    #[test]
6358    fn read_chunk_deleted_source_reports_gone() {
6359        let (root, config) = markdown_config("# Title\nalpha token\n");
6360        let db = IndexDatabase::rebuild(&config).unwrap();
6361        let chunk_id = first_chunk_id(&db);
6362        fs::remove_file(root.join("docs/search.md")).unwrap();
6363
6364        let err = db.read_chunk(chunk_id).unwrap_err().to_string();
6365        assert!(err.contains("Gone"), "{err}");
6366        assert!(db.search("alpha", 10, false).unwrap().is_empty());
6367
6368        fs::remove_dir_all(root).unwrap();
6369    }
6370
6371    #[test]
6372    fn search_returns_needs_reindex_when_heal_cap_is_exceeded() {
6373        let root = unique_temp_root();
6374        let _ = fs::remove_dir_all(&root);
6375        let docs = root.join("docs");
6376        fs::create_dir_all(&docs).unwrap();
6377        for index in 0..=MAX_AUTO_HEAL_FILES_PER_CALL {
6378            fs::write(docs.join(format!("doc-{index}.md")), "common stale token\n").unwrap();
6379        }
6380        let config = markdown_config_for_root(root.clone());
6381        let db = IndexDatabase::rebuild(&config).unwrap();
6382        for index in 0..=MAX_AUTO_HEAL_FILES_PER_CALL {
6383            fs::write(docs.join(format!("doc-{index}.md")), "fresh replacement token\n").unwrap();
6384        }
6385
6386        let err = db.search("common", 20, false).unwrap_err().to_string();
6387        assert!(err.contains("needs_reindex"), "{err}");
6388
6389        fs::remove_dir_all(root).unwrap();
6390    }
6391
6392    #[test]
6393    fn heal_index_limit_does_not_warn_when_only_fresh_files_are_skipped() {
6394        let root = unique_temp_root();
6395        let _ = fs::remove_dir_all(&root);
6396        let docs = root.join("docs");
6397        fs::create_dir_all(&docs).unwrap();
6398        fs::write(docs.join("one.md"), "one fresh token\n").unwrap();
6399        fs::write(docs.join("two.md"), "two fresh token\n").unwrap();
6400        let config = markdown_config_for_root(root.clone());
6401        let db = IndexDatabase::rebuild(&config).unwrap();
6402
6403        let report = db.heal_index(Some(1)).unwrap();
6404
6405        assert_eq!(report.healed_files, 0);
6406        assert_eq!(report.removed_files, 0);
6407        assert_eq!(report.skipped_files, 2);
6408        assert_eq!(report.message, None);
6409
6410        fs::remove_dir_all(root).unwrap();
6411    }
6412
6413    #[test]
6414    fn search_recovers_when_fts_revision_is_stale() {
6415        let (root, config) = markdown_config("alpha token");
6416        let db = IndexDatabase::rebuild(&config).unwrap();
6417        db.set_meta("fts_source_revision", "stale").unwrap();
6418
6419        let stale = db.status(&config.database).unwrap();
6420        assert!(!stale.fts_dirty);
6421        assert!(!stale.fts_fresh);
6422
6423        let hits = db.search("alpha", 10, false).unwrap();
6424        assert_eq!(hits.len(), 1);
6425        let fresh = db.status(&config.database).unwrap();
6426        assert_eq!(fresh.fts_source_revision.as_deref(), Some(fresh.content_revision.as_str()));
6427        assert!(fresh.fts_fresh);
6428
6429        fs::remove_dir_all(root).unwrap();
6430    }
6431
6432    #[test]
6433    fn parser_failures_report_paths() {
6434        let root = unique_temp_root();
6435        let _ = fs::remove_dir_all(&root);
6436        let src = root.join("src");
6437        fs::create_dir_all(&src).unwrap();
6438        fs::write(src.join("broken.rs"), "pub fn broken(").unwrap();
6439        let config = Config {
6440            root: root.clone(),
6441            database: root.join(".rag-rat/index.sqlite"),
6442            targets: vec![ResolvedTarget {
6443                name: "rust".to_string(),
6444                language: Language::Rust,
6445                directories: vec![PathBuf::from("src")],
6446                include: vec!["**/*.rs".to_string()],
6447                exclude: Vec::new(),
6448                kind: TargetKind::Source,
6449            }],
6450            local_ai: Default::default(),
6451        };
6452
6453        let db = IndexDatabase::rebuild(&config).unwrap();
6454        let status = db.status(&config.database).unwrap();
6455        assert_eq!(status.parser_failures, 1);
6456        assert_eq!(status.parser_failure_paths[0].path, "src/broken.rs");
6457
6458        fs::remove_dir_all(root).unwrap();
6459    }
6460
6461    #[test]
6462    fn repo_memory_bound_to_logical_symbol_surfaces_in_symbol_chunk_and_impact() {
6463        let root = unique_temp_root();
6464        let _ = fs::remove_dir_all(&root);
6465        fs::create_dir_all(root.join("src")).unwrap();
6466        fs::write(
6467            root.join("src/lib.rs"),
6468            "#[cfg(unix)]\npub fn cfg_helper() {}\n#[cfg(windows)]\npub fn cfg_helper() {}\n",
6469        )
6470        .unwrap();
6471        let config = source_config(root.clone(), Language::Rust);
6472        let db = IndexDatabase::rebuild(&config).unwrap();
6473        let symbol = db
6474            .select_symbol(&crate::query::symbol::SymbolSelector {
6475                logical_symbol_id: None,
6476                symbol_id: None,
6477                symbol_path: None,
6478                symbol: Some("cfg_helper".to_string()),
6479                language: Some(Language::Rust),
6480                allow_ambiguous: true,
6481                limit: 10,
6482            })
6483            .unwrap()
6484            .unwrap()
6485            .expect("selected symbol");
6486        let logical_symbol_id = symbol.logical_symbol_id.expect("logical symbol id");
6487
6488        let created = db
6489            .memory_create(crate::query::memory::RepoMemoryCreate {
6490                kind: "Invariant".to_string(),
6491                title: "Treat cfg helper variants as one logical helper".to_string(),
6492                body: "Caller and impact analysis should use the logical symbol, not one cfg body variant."
6493                    .to_string(),
6494                confidence: "high".to_string(),
6495                created_by: Some("test-agent".to_string()),
6496                source: Some("agent".to_string()),
6497                tags: vec!["cfg".to_string(), "graph".to_string()],
6498                bind: crate::query::memory::RepoMemoryBindTarget {
6499                    logical_symbol_id: Some(logical_symbol_id),
6500                    symbol_id: None,
6501                    chunk_id: None,
6502                    edge_id: None,
6503                    path: None,
6504                    start_line: None,
6505                    end_line: None,
6506                    commit_hash: None,
6507                    github_owner: None,
6508                    github_repo: None,
6509                    github_number: None,
6510                    start_logical_symbol_id: None,
6511                    end_logical_symbol_id: None,
6512                    edge_sequence_hash: None,
6513                    path_summary: None,
6514                },
6515            })
6516            .unwrap();
6517        assert!(!created.duplicate);
6518        assert_eq!(created.memory.bindings[0].binding_kind, "logical_symbol");
6519
6520        let memories = db.memory_for_symbol(&symbol, 10).unwrap();
6521        assert_eq!(memories.len(), 1);
6522        assert_eq!(memories[0].kind, "Invariant");
6523        let chunk_id = memories[0].bindings[0].chunk_id.expect("bound chunk");
6524        let chunk = db.read_chunk(chunk_id).unwrap().expect("memory chunk");
6525        assert_eq!(chunk.memories.len(), 1);
6526        assert_eq!(chunk.memories[0].memory_id, created.memory.memory_id);
6527
6528        let impact = db
6529            .impact_surface_report_for_selected_symbol(
6530                &symbol,
6531                10,
6532                &crate::query::impact::ImpactSurfaceOptions::default(),
6533            )
6534            .unwrap();
6535        assert_eq!(impact.repo_memories.direct.len(), 1);
6536        assert_eq!(impact.completeness_and_caveats.memory_status.active, 1);
6537        assert_eq!(impact.completeness_and_caveats.memory_status.stale, 0);
6538
6539        fs::remove_dir_all(root).unwrap();
6540    }
6541
6542    #[test]
6543    fn repo_memory_validate_marks_changed_or_missing_anchors_non_current() {
6544        let root = unique_temp_root();
6545        let _ = fs::remove_dir_all(&root);
6546        fs::create_dir_all(root.join("src")).unwrap();
6547        fs::write(root.join("src/lib.rs"), "pub fn anchored_memory() {}\n").unwrap();
6548        let config = source_config(root.clone(), Language::Rust);
6549        let db = IndexDatabase::rebuild(&config).unwrap();
6550        let symbol = db
6551            .select_symbol(&crate::query::symbol::SymbolSelector {
6552                logical_symbol_id: None,
6553                symbol_id: None,
6554                symbol_path: None,
6555                symbol: Some("anchored_memory".to_string()),
6556                language: Some(Language::Rust),
6557                allow_ambiguous: false,
6558                limit: 10,
6559            })
6560            .unwrap()
6561            .unwrap()
6562            .expect("selected symbol");
6563        let chunk_id = db
6564            .storage
6565            .connection()
6566            .query_row(
6567                "
6568                SELECT chunks.id
6569                FROM chunks
6570                JOIN files ON files.id = chunks.file_id
6571                WHERE files.path = ?1 AND chunks.symbol_path = ?2
6572                LIMIT 1
6573                ",
6574                params![symbol.path, symbol.qualified_name],
6575                |row| row.get::<_, i64>(0),
6576            )
6577            .unwrap();
6578        let created = db
6579            .memory_create(crate::query::memory::RepoMemoryCreate {
6580                kind: "Risk".to_string(),
6581                title: "Anchor must become stale when source hash changes".to_string(),
6582                body: "Validation should separate stale memories from current repo evidence."
6583                    .to_string(),
6584                confidence: "medium".to_string(),
6585                created_by: Some("test-agent".to_string()),
6586                source: Some("agent".to_string()),
6587                tags: Vec::new(),
6588                bind: crate::query::memory::RepoMemoryBindTarget {
6589                    logical_symbol_id: None,
6590                    symbol_id: None,
6591                    chunk_id: Some(chunk_id),
6592                    edge_id: None,
6593                    path: None,
6594                    start_line: None,
6595                    end_line: None,
6596                    commit_hash: None,
6597                    github_owner: None,
6598                    github_repo: None,
6599                    github_number: None,
6600                    start_logical_symbol_id: None,
6601                    end_logical_symbol_id: None,
6602                    edge_sequence_hash: None,
6603                    path_summary: None,
6604                },
6605            })
6606            .unwrap();
6607
6608        db.storage
6609            .connection()
6610            .execute("UPDATE chunks SET text_hash = 'changed' WHERE id = ?1", [chunk_id])
6611            .unwrap();
6612        let report = db.memory_validate().unwrap();
6613        assert_eq!(report.stale, 1);
6614        let stale = db.memory_for_symbol(&symbol, 10).unwrap();
6615        assert_eq!(stale[0].memory_id, created.memory.memory_id);
6616        assert_eq!(stale[0].bindings[0].anchor_status, "stale");
6617
6618        db.storage.connection().execute("DELETE FROM chunks WHERE id = ?1", [chunk_id]).unwrap();
6619        let report = db.memory_validate().unwrap();
6620        assert_eq!(report.gone, 1);
6621        let gone = db.memory_for_symbol(&symbol, 10).unwrap();
6622        assert_eq!(gone[0].bindings[0].anchor_status, "gone");
6623
6624        fs::remove_dir_all(root).unwrap();
6625    }
6626
6627    #[test]
6628    fn repo_memory_bound_to_edge_surfaces_when_impact_crosses_call_path() {
6629        let root = unique_temp_root();
6630        let _ = fs::remove_dir_all(&root);
6631        fs::create_dir_all(root.join("src")).unwrap();
6632        fs::write(
6633            root.join("src/lib.rs"),
6634            "pub fn target_edge() {}\npub fn caller_edge() {\n    target_edge();\n}\n",
6635        )
6636        .unwrap();
6637        let config = source_config(root.clone(), Language::Rust);
6638        let db = IndexDatabase::rebuild(&config).unwrap();
6639        let target = db
6640            .select_symbol(&crate::query::symbol::SymbolSelector {
6641                logical_symbol_id: None,
6642                symbol_id: None,
6643                symbol_path: None,
6644                symbol: Some("target_edge".to_string()),
6645                language: Some(Language::Rust),
6646                allow_ambiguous: false,
6647                limit: 10,
6648            })
6649            .unwrap()
6650            .unwrap()
6651            .expect("selected target");
6652        let graph_options = crate::query::graph::GraphTraversalOptions {
6653            resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
6654            symbol_id: Some(target.symbol_id),
6655            logical_symbol_id: target.logical_symbol_id,
6656            ..Default::default()
6657        };
6658        let callers =
6659            db.graph_traversal_report("find_callers", &target, true, 10, &graph_options).unwrap();
6660        let edge_id = callers.results[0].edge_id;
6661
6662        let edge_memory = db
6663            .memory_create(crate::query::memory::RepoMemoryCreate {
6664                kind: "Risk".to_string(),
6665                title: "caller_edge to target_edge must stay synchronous".to_string(),
6666                body: "This specific call path is used to prove edge-bound memories surface when impact crosses the edge."
6667                    .to_string(),
6668                confidence: "high".to_string(),
6669                created_by: Some("test-agent".to_string()),
6670                source: Some("agent".to_string()),
6671                tags: vec!["edge".to_string()],
6672                bind: crate::query::memory::RepoMemoryBindTarget {
6673                    logical_symbol_id: None,
6674                    symbol_id: None,
6675                    chunk_id: None,
6676                    edge_id: Some(edge_id),
6677                    path: None,
6678                    start_line: None,
6679                    end_line: None,
6680                    commit_hash: None,
6681                    github_owner: None,
6682                    github_repo: None,
6683                    github_number: None,
6684                    start_logical_symbol_id: None,
6685                    end_logical_symbol_id: None,
6686                    edge_sequence_hash: None,
6687                    path_summary: None,
6688                },
6689            })
6690            .unwrap();
6691        assert_eq!(edge_memory.memory.bindings[0].binding_kind, "edge");
6692        assert_eq!(edge_memory.memory.bindings[0].edge_id, Some(edge_id));
6693
6694        let impact = db
6695            .impact_surface_report_for_selected_symbol(
6696                &target,
6697                10,
6698                &crate::query::impact::ImpactSurfaceOptions {
6699                    resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
6700                    ..Default::default()
6701                },
6702            )
6703            .unwrap();
6704        assert!(impact.repo_memories.direct.is_empty());
6705        assert_eq!(impact.repo_memories.path_crossed.len(), 1);
6706        assert_eq!(impact.repo_memories.path_crossed[0].memory_id, edge_memory.memory.memory_id);
6707        assert_eq!(impact.completeness_and_caveats.memory_status.active, 1);
6708
6709        let call_path_memory = db
6710            .memory_create(crate::query::memory::RepoMemoryCreate {
6711                kind: "TestExpectation".to_string(),
6712                title: "caller_edge path hash recall".to_string(),
6713                body: "Call-path memories are addressable by a deterministic edge sequence hash."
6714                    .to_string(),
6715                confidence: "medium".to_string(),
6716                created_by: Some("test-agent".to_string()),
6717                source: Some("agent".to_string()),
6718                tags: vec!["call-path".to_string()],
6719                bind: crate::query::memory::RepoMemoryBindTarget {
6720                    logical_symbol_id: None,
6721                    symbol_id: None,
6722                    chunk_id: None,
6723                    edge_id: None,
6724                    path: None,
6725                    start_line: None,
6726                    end_line: None,
6727                    commit_hash: None,
6728                    github_owner: None,
6729                    github_repo: None,
6730                    github_number: None,
6731                    start_logical_symbol_id: target.logical_symbol_id,
6732                    end_logical_symbol_id: target.logical_symbol_id,
6733                    edge_sequence_hash: Some("edge-sequence-test-hash".to_string()),
6734                    path_summary: Some("caller_edge -> target_edge".to_string()),
6735                },
6736            })
6737            .unwrap();
6738        let call_path = db.memory_for_call_path_hash("edge-sequence-test-hash", 10).unwrap();
6739        assert_eq!(call_path.len(), 1);
6740        assert_eq!(call_path[0].memory_id, call_path_memory.memory.memory_id);
6741        assert_eq!(call_path[0].call_paths[0].path_summary, "caller_edge -> target_edge");
6742
6743        fs::remove_dir_all(root).unwrap();
6744    }
6745
6746    #[test]
6747    fn repo_brief_ranks_churn_and_god_module_candidates() {
6748        let root = unique_temp_root();
6749        let _ = fs::remove_dir_all(&root);
6750        fs::create_dir_all(root.join("src")).unwrap();
6751        run_git(&root, &["init"]);
6752        run_git(&root, &["config", "user.name", "Rag Rat"]);
6753        run_git(&root, &["config", "user.email", "rag@example.com"]);
6754
6755        fs::write(root.join("src/stable.rs"), "pub fn stable() -> i32 { 1 }\n").unwrap();
6756        fs::write(root.join("src/hot.rs"), hot_module_text(0)).unwrap();
6757        run_git(&root, &["add", "."]);
6758        run_git(&root, &["commit", "-m", "Add initial modules"]);
6759
6760        for revision in 1..=3 {
6761            fs::write(root.join("src/hot.rs"), hot_module_text(revision)).unwrap();
6762            run_git(&root, &["add", "src/hot.rs"]);
6763            run_git(&root, &["commit", "-m", "Iterate hot module"]);
6764        }
6765
6766        let config = Config {
6767            root: root.clone(),
6768            database: root.join(".rag-rat/index.sqlite"),
6769            targets: vec![ResolvedTarget {
6770                name: "rust".to_string(),
6771                language: Language::Rust,
6772                directories: vec![PathBuf::from("src")],
6773                include: vec!["**/*.rs".to_string()],
6774                exclude: Vec::new(),
6775                kind: TargetKind::Source,
6776            }],
6777            local_ai: Default::default(),
6778        };
6779        let db = IndexDatabase::rebuild(&config).unwrap();
6780
6781        let churn = db
6782            .repo_brief(crate::query::repo_brief::RepoBriefOptions {
6783                mode: crate::query::repo_brief::RepoBriefMode::Churn,
6784                limit: 1,
6785                include_generated: false,
6786                include_memories: true,
6787            })
6788            .unwrap();
6789        assert_eq!(churn.candidates[0].path, "src/hot.rs");
6790        assert_eq!(churn.candidates[0].category, "recent_churn_hotspot");
6791        assert!(churn.candidates[0].score <= 1.0);
6792        assert!(churn.candidates[0].metrics.commit_touch_count >= 4);
6793        assert!(churn.candidates[0].why.iter().any(|reason| reason.contains("churn")));
6794
6795        let god_modules = db
6796            .repo_brief(crate::query::repo_brief::RepoBriefOptions {
6797                mode: crate::query::repo_brief::RepoBriefMode::GodModules,
6798                limit: 1,
6799                include_generated: false,
6800                include_memories: true,
6801            })
6802            .unwrap();
6803        assert_eq!(god_modules.candidates[0].path, "src/hot.rs");
6804        assert!(god_modules.candidates[0].score <= 1.0);
6805        assert!(god_modules.candidates[0].metrics.symbol_count >= 30);
6806        assert!(!god_modules.candidates[0].split_hints.is_empty());
6807        assert!(
6808            god_modules.candidates[0].next_tools.iter().any(|tool| tool.tool == "impact_surface")
6809        );
6810
6811        fs::remove_dir_all(root).unwrap();
6812    }
6813
6814    fn hot_module_text(revision: usize) -> String {
6815        let mut text = String::new();
6816        text.push_str("pub fn entry() -> i32 {\n");
6817        for i in 0..32 {
6818            text.push_str(&format!("    helper_{i}() +\n"));
6819        }
6820        text.push_str(&format!("    {revision}\n}}\n"));
6821        for i in 0..32 {
6822            text.push_str(&format!("pub fn helper_{i}() -> i32 {{ {i} }}\n"));
6823        }
6824        text
6825    }
6826
6827    fn unique_temp_root() -> PathBuf {
6828        let mut root = std::env::temp_dir();
6829        let suffix = TEMP_COUNTER.fetch_add(1, Ordering::Relaxed);
6830        root.push(format!("rag-rat-schema-test-{}-{}-{suffix}", std::process::id(), now_ms()));
6831        root
6832    }
6833
6834    fn fixture_temp_root(fixture: &str) -> PathBuf {
6835        let root = unique_temp_root();
6836        let _ = fs::remove_dir_all(&root);
6837        let fixture_root =
6838            PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../tests/fixtures").join(fixture);
6839        copy_fixture_dir(&fixture_root, &root);
6840        root
6841    }
6842
6843    fn copy_fixture_dir(from: &Path, to: &Path) {
6844        fs::create_dir_all(to).unwrap();
6845        for entry in fs::read_dir(from).unwrap() {
6846            let entry = entry.unwrap();
6847            let from_path = entry.path();
6848            let to_path = to.join(entry.file_name());
6849            if from_path.is_dir() {
6850                copy_fixture_dir(&from_path, &to_path);
6851            } else {
6852                fs::copy(&from_path, &to_path).unwrap();
6853            }
6854        }
6855    }
6856
6857    fn markdown_config(text: &str) -> (PathBuf, Config) {
6858        let root = unique_temp_root();
6859        let _ = fs::remove_dir_all(&root);
6860        let docs = root.join("docs");
6861        fs::create_dir_all(&docs).unwrap();
6862        fs::write(docs.join("search.md"), text).unwrap();
6863        let config = markdown_config_for_root(root.clone());
6864        (root, config)
6865    }
6866
6867    fn markdown_config_for_root(root: PathBuf) -> Config {
6868        Config {
6869            root: root.clone(),
6870            database: root.join(".rag-rat/index.sqlite"),
6871            targets: vec![ResolvedTarget {
6872                name: "markdown".to_string(),
6873                language: Language::Markdown,
6874                directories: vec![PathBuf::from("docs")],
6875                include: vec!["**/*.md".to_string()],
6876                exclude: Vec::new(),
6877                kind: TargetKind::Docs,
6878            }],
6879            local_ai: Default::default(),
6880        }
6881    }
6882
6883    fn source_config(root: PathBuf, language: Language) -> Config {
6884        Config {
6885            root: root.clone(),
6886            database: root.join(".rag-rat/index.sqlite"),
6887            targets: vec![ResolvedTarget {
6888                name: language.as_str().to_string(),
6889                language,
6890                directories: vec![PathBuf::from("src")],
6891                include: vec!["src/".to_string()],
6892                exclude: Vec::new(),
6893                kind: TargetKind::Source,
6894            }],
6895            local_ai: Default::default(),
6896        }
6897    }
6898
6899    fn assert_edge(db: &IndexDatabase, from: &str, to: &str, edge_kind: &str, confidence: &str) {
6900        let count = db
6901            .storage
6902            .connection()
6903            .query_row(
6904                "
6905                SELECT COUNT(*)
6906                FROM edges
6907                WHERE edge_kind = ?1
6908                  AND confidence = ?2
6909                  AND COALESCE(from_name, '') LIKE ?3
6910                  AND to_name LIKE ?4
6911                ",
6912                params![edge_kind, confidence, format!("%{from}%"), format!("%{to}%")],
6913                |row| row.get::<_, i64>(0),
6914            )
6915            .unwrap();
6916        assert!(count > 0, "missing edge {from} -[{edge_kind}/{confidence}]-> {to}");
6917    }
6918
6919    fn table_count(db: &IndexDatabase, table: &str) -> i64 {
6920        db.storage
6921            .connection()
6922            .query_row("SELECT COUNT(*) FROM sqlite_master WHERE name = ?1", [table], |row| {
6923                row.get(0)
6924            })
6925            .unwrap()
6926    }
6927
6928    fn row_count(db: &IndexDatabase, table: &str) -> i64 {
6929        db.storage
6930            .connection()
6931            .query_row(&format!("SELECT COUNT(*) FROM {table}"), [], |row| row.get(0))
6932            .unwrap()
6933    }
6934
6935    fn chunk_columns(db: &IndexDatabase) -> Vec<String> {
6936        table_columns(db, "chunks")
6937    }
6938
6939    fn file_columns(db: &IndexDatabase) -> Vec<String> {
6940        table_columns(db, "files")
6941    }
6942
6943    fn table_columns(db: &IndexDatabase, table: &str) -> Vec<String> {
6944        let mut stmt =
6945            db.storage.connection().prepare(&format!("PRAGMA table_info({table})")).unwrap();
6946        stmt.query_map([], |row| row.get::<_, String>(1)).unwrap().map(Result::unwrap).collect()
6947    }
6948
6949    fn indexed_revision_count(db: &IndexDatabase) -> i64 {
6950        db.storage
6951            .connection()
6952            .query_row("SELECT COUNT(*) FROM files WHERE indexed_revision != ''", [], |row| {
6953                row.get(0)
6954            })
6955            .unwrap()
6956    }
6957
6958    fn chunk_source_revision_count(db: &IndexDatabase) -> i64 {
6959        db.storage
6960            .connection()
6961            .query_row("SELECT COUNT(*) FROM chunks WHERE source_revision != ''", [], |row| {
6962                row.get(0)
6963            })
6964            .unwrap()
6965    }
6966
6967    fn first_chunk_id(db: &IndexDatabase) -> i64 {
6968        db.storage
6969            .connection()
6970            .query_row("SELECT id FROM chunks ORDER BY id LIMIT 1", [], |row| row.get(0))
6971            .unwrap()
6972    }
6973
6974    fn run_git(root: &Path, args: &[&str]) {
6975        let output = Command::new("git").args(args).current_dir(root).output().unwrap();
6976        assert!(
6977            output.status.success(),
6978            "git {:?} failed\nstdout:\n{}\nstderr:\n{}",
6979            args,
6980            String::from_utf8_lossy(&output.stdout),
6981            String::from_utf8_lossy(&output.stderr)
6982        );
6983    }
6984
6985    struct MockGitHubClient;
6986
6987    impl github::GitHubClient for MockGitHubClient {
6988        fn issue(
6989            &self,
6990            owner: &str,
6991            repo: &str,
6992            number: i64,
6993        ) -> anyhow::Result<github::GitHubIssue> {
6994            Ok(github::GitHubIssue {
6995                owner: owner.to_string(),
6996                repo: repo.to_string(),
6997                number,
6998                html_url: format!("https://github.com/{owner}/{repo}/issues/{number}"),
6999                state: "open".to_string(),
7000                title: "Decision: keep sqlite".to_string(),
7001                body: "We decided sqlite is required for binary size.".to_string(),
7002                author: Some("octo".to_string()),
7003                created_at: Some("2026-01-01T00:00:00Z".to_string()),
7004                updated_at: Some("2026-01-02T00:00:00Z".to_string()),
7005                is_pull_request: true,
7006            })
7007        }
7008
7009        fn issue_comments(
7010            &self,
7011            owner: &str,
7012            repo: &str,
7013            number: i64,
7014        ) -> anyhow::Result<Vec<github::GitHubComment>> {
7015            Ok(vec![github::GitHubComment {
7016                id: 4201,
7017                owner: owner.to_string(),
7018                repo: repo.to_string(),
7019                number,
7020                html_url: format!("https://github.com/{owner}/{repo}/issues/{number}#comment-1"),
7021                body: "Rejected alternative: duckdb was too large.".to_string(),
7022                author: Some("octo".to_string()),
7023                created_at: Some("2026-01-01T01:00:00Z".to_string()),
7024                updated_at: Some("2026-01-01T01:00:00Z".to_string()),
7025            }])
7026        }
7027
7028        fn pull(
7029            &self,
7030            owner: &str,
7031            repo: &str,
7032            number: i64,
7033        ) -> anyhow::Result<Option<github::GitHubPullRequest>> {
7034            Ok(Some(github::GitHubPullRequest {
7035                owner: owner.to_string(),
7036                repo: repo.to_string(),
7037                number,
7038                html_url: format!("https://github.com/{owner}/{repo}/pull/{number}"),
7039                state: "open".to_string(),
7040                title: "Use sqlite".to_string(),
7041                body: "Constraint: normal queries must use cache only.".to_string(),
7042                author: Some("octo".to_string()),
7043                created_at: Some("2026-01-01T00:00:00Z".to_string()),
7044                updated_at: Some("2026-01-02T00:00:00Z".to_string()),
7045                merged_at: None,
7046            }))
7047        }
7048
7049        fn pull_reviews(
7050            &self,
7051            owner: &str,
7052            repo: &str,
7053            number: i64,
7054        ) -> anyhow::Result<Vec<github::GitHubReview>> {
7055            Ok(vec![github::GitHubReview {
7056                id: 4202,
7057                owner: owner.to_string(),
7058                repo: repo.to_string(),
7059                number,
7060                html_url: Some(format!("https://github.com/{owner}/{repo}/pull/{number}#review")),
7061                state: "COMMENTED".to_string(),
7062                body: "Risk: live crawling during search would be surprising.".to_string(),
7063                author: Some("reviewer".to_string()),
7064                submitted_at: Some("2026-01-01T02:00:00Z".to_string()),
7065            }])
7066        }
7067
7068        fn pull_review_comments(
7069            &self,
7070            owner: &str,
7071            repo: &str,
7072            number: i64,
7073        ) -> anyhow::Result<Vec<github::GitHubReviewComment>> {
7074            Ok(vec![github::GitHubReviewComment {
7075                id: 4203,
7076                owner: owner.to_string(),
7077                repo: repo.to_string(),
7078                number,
7079                path: Some("docs/search.md".to_string()),
7080                html_url: format!("https://github.com/{owner}/{repo}/pull/{number}#discussion"),
7081                body: "No longer use obsolete duckdb rationale.".to_string(),
7082                author: Some("reviewer".to_string()),
7083                created_at: Some("2026-01-01T03:00:00Z".to_string()),
7084                updated_at: Some("2026-01-01T03:00:00Z".to_string()),
7085            }])
7086        }
7087    }
7088
7089    struct PartiallyFailingGitHubClient;
7090
7091    impl github::GitHubClient for PartiallyFailingGitHubClient {
7092        fn issue(
7093            &self,
7094            owner: &str,
7095            repo: &str,
7096            number: i64,
7097        ) -> anyhow::Result<github::GitHubIssue> {
7098            if number == 404 {
7099                anyhow::bail!("gh: Not Found (HTTP 404)");
7100            }
7101            MockGitHubClient.issue(owner, repo, number)
7102        }
7103
7104        fn issue_comments(
7105            &self,
7106            owner: &str,
7107            repo: &str,
7108            number: i64,
7109        ) -> anyhow::Result<Vec<github::GitHubComment>> {
7110            MockGitHubClient.issue_comments(owner, repo, number)
7111        }
7112
7113        fn pull(
7114            &self,
7115            owner: &str,
7116            repo: &str,
7117            number: i64,
7118        ) -> anyhow::Result<Option<github::GitHubPullRequest>> {
7119            MockGitHubClient.pull(owner, repo, number)
7120        }
7121
7122        fn pull_reviews(
7123            &self,
7124            owner: &str,
7125            repo: &str,
7126            number: i64,
7127        ) -> anyhow::Result<Vec<github::GitHubReview>> {
7128            MockGitHubClient.pull_reviews(owner, repo, number)
7129        }
7130
7131        fn pull_review_comments(
7132            &self,
7133            owner: &str,
7134            repo: &str,
7135            number: i64,
7136        ) -> anyhow::Result<Vec<github::GitHubReviewComment>> {
7137            MockGitHubClient.pull_review_comments(owner, repo, number)
7138        }
7139    }
7140}