1pub mod ai;
2pub mod anchors;
3pub mod chunker;
4pub mod edges;
5pub mod git_history;
6pub mod github;
7pub mod parser;
8pub mod schema;
9pub mod symbols;
10pub mod walker;
11
12#[cfg(test)]
13mod anchor_tests;
14#[cfg(test)]
15mod parser_tests;
16
17use std::{
18 collections::{BTreeMap, BTreeSet},
19 fs,
20 path::{Path, PathBuf},
21 process::Command,
22 sync::{
23 atomic::{AtomicUsize, Ordering},
24 mpsc,
25 },
26 thread,
27 thread::JoinHandle,
28 time::{SystemTime, UNIX_EPOCH},
29};
30
31use gix::{
32 bstr::{BString, ByteSlice},
33 status::{UntrackedFiles, tree_index},
34};
35use rayon::prelude::*;
36use regex::Regex;
37use rusqlite::{OptionalExtension, params};
38use serde::Serialize;
39use sha2::{Digest, Sha256};
40use thiserror::Error;
41
42use crate::{
43 config::{Config, TargetKind},
44 index::{
45 ai::{LocalAiStatus, ModelInfo, ReconcilePlan, ReconcileReport},
46 anchors::{AnchorStatus, ChunkAnchor},
47 chunker::Chunk,
48 git_history::{
49 ChunkBlameSummary, CommitSearchHit, GitHistoryIndexStatus, PathHistoryItem,
50 QueryCommitHit, SymbolHistoryItem,
51 },
52 github::{GitHubEvidence, GitHubStatus, GitHubSyncReport, Papertrail},
53 symbols::Symbol,
54 },
55 language::Language,
56 query::graph_meta::{self, GraphMetaMode},
57 search::lexical::{SearchHit, SearchOptions},
58 storage::IndexConnection,
59 storage::StorageStatus,
60};
61
62#[derive(Debug)]
63pub struct IndexDatabase {
64 storage: IndexConnection,
65 pub active_commit_sha: String,
66 pub active_worktree_id: String,
67}
68
69#[derive(Debug, Clone)]
70pub enum IndexProgress {
71 Started {
72 database: PathBuf,
73 mode: IndexMode,
74 },
75 Discovering,
76 Discovered {
77 files: usize,
78 },
79 PreparingFile {
80 current: usize,
81 total: usize,
82 path: PathBuf,
83 language: Language,
84 kind: TargetKind,
85 },
86 IndexingFile {
87 current: usize,
88 total: usize,
89 path: PathBuf,
90 language: Language,
91 kind: TargetKind,
92 },
93 IndexingGitHistory,
94 RebuildingLogicalSymbols,
95 ResolvingGraph,
96 SyncingFts,
97 RebuildingFts,
98 Finished {
99 files: usize,
100 },
101}
102
103#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
104#[serde(rename_all = "snake_case")]
105pub enum IndexMode {
106 Changed,
107 Discover,
108 Full,
109}
110
111impl IndexMode {
112 pub fn label(self) -> &'static str {
113 match self {
114 Self::Changed => "changed files",
115 Self::Discover => "discovery",
116 Self::Full => "full rebuild",
117 }
118 }
119}
120
121#[derive(Debug, Serialize)]
122pub struct IndexStatus {
123 pub database: String,
124 pub exists: bool,
125 pub schema: schema::SchemaStatus,
126 pub git_commit: Option<String>,
127 pub git_dirty: Option<bool>,
128 pub indexed_at_ms: Option<i64>,
129 pub content_revision: String,
130 pub fts_synced_at_ms: Option<i64>,
131 pub fts_source_revision: Option<String>,
132 pub fts_dirty: bool,
133 pub fts_fresh: bool,
134 pub file_count_by_language: BTreeMap<String, u64>,
135 pub parser_failures: u64,
136 pub parser_failure_paths: Vec<ParserFailure>,
137 pub git_history: GitHistoryIndexStatus,
138 pub github: GitHubStatus,
139 pub local_ai: LocalAiStatus,
140}
141
142#[derive(Debug, Serialize)]
143pub struct HealIndexReport {
144 pub checked_files: u64,
145 pub healed_files: u64,
146 pub removed_files: u64,
147 pub skipped_files: u64,
148 pub fts_fresh: bool,
149 pub message: Option<String>,
150}
151
152#[derive(Debug, Clone, Serialize)]
153pub struct GcReport {
154 pub files_pruned: u64,
155 pub chunks_pruned: u64,
156 pub files_remaining: u64,
157 pub chunks_remaining: u64,
158 pub skipped: bool,
160}
161
162#[derive(Debug, Serialize)]
163pub struct ParserFailure {
164 pub path: String,
165 pub language: String,
166 pub message: String,
167}
168
169#[derive(Debug, Serialize)]
170pub struct DiscoveryStatus {
171 pub discovered_files: usize,
172 pub indexed_files: usize,
173 pub unindexed_files: usize,
174 pub unindexed_source_files: usize,
175 pub changed_indexed_files: usize,
176 pub removed_indexed_files: usize,
177 pub unindexed_sample: Vec<String>,
178 pub warning: Option<String>,
179}
180
181const MAX_AUTO_HEAL_FILES_PER_CALL: usize = 4;
182const GRAPH_INDEX_VERSION: &str = "6";
183
184#[derive(Debug, Error)]
185pub enum IndexError {
186 #[error("Gone: indexed chunk {chunk_id} no longer exists")]
187 Gone { chunk_id: i64 },
188 #[error("StaleChunk: chunk {chunk_id} in {path} could not be relocated after reindex")]
189 StaleChunk { chunk_id: i64, path: String },
190 #[error("needs_reindex: {stale_files} stale files exceeds automatic heal cap {cap}")]
191 NeedsReindex { stale_files: usize, cap: usize },
192}
193
194impl IndexDatabase {
195 pub fn open(path: &Path) -> anyhow::Result<Self> {
196 Self::open_with_graph_check(path, true)
197 }
198
199 pub fn database_path(&self) -> &Path {
200 self.storage.database_path()
201 }
202
203 fn open_with_graph_check(path: &Path, check_graph: bool) -> anyhow::Result<Self> {
204 let mut storage = IndexConnection::open(path)?;
205 schema::check_compatible(storage.connection())?;
206 ai::ensure_model_manifest(storage.connection())?;
207 if let Some(root) = meta_for(storage.connection(), "source_root")? {
208 storage.set_source_root(PathBuf::from(root));
209 }
210 let db =
211 Self { storage, active_commit_sha: String::new(), active_worktree_id: String::new() };
212 if check_graph {
213 db.ensure_graph_index_current()?;
214 }
215 Ok(db)
216 }
217
218 pub fn open_config(config: &Config) -> anyhow::Result<Self> {
219 let mut db = Self::open_with_graph_check(&config.database, false)?;
220 db.storage.set_source_root(config.root.clone());
221 let (commit_sha, worktree_id) = resolve_git_context(&config.root);
222 db.set_context(&commit_sha, &worktree_id)?;
223 db.ensure_graph_index_current()?;
224 Ok(db)
225 }
226
227 pub fn migrate(path: &Path) -> anyhow::Result<schema::SchemaStatus> {
228 Self::migrate_with_fastembed_cache(path, None)
229 }
230
231 fn migrate_with_fastembed_cache(
232 path: &Path,
233 fastembed_cache_dir: Option<&Path>,
234 ) -> anyhow::Result<schema::SchemaStatus> {
235 let storage = IndexConnection::open(path)?;
236 let status = schema::status(storage.connection())?;
237 match status.state {
238 schema::SchemaState::Newer | schema::SchemaState::Dirty => {
239 anyhow::bail!("{}", status.message);
240 },
241 schema::SchemaState::Compatible => {},
242 schema::SchemaState::Missing | schema::SchemaState::Older => {
243 schema::apply(storage.connection())?;
244 },
245 }
246 ai::ensure_model_manifest(storage.connection())?;
247 if let Some(fastembed_cache_dir) = fastembed_cache_dir {
248 ai::recover_cached_fastembed_model_from(storage.connection(), fastembed_cache_dir)?;
249 } else {
250 ai::recover_cached_fastembed_model(storage.connection())?;
251 }
252 schema::status(storage.connection())
253 }
254
255 pub fn migration_check(path: &Path) -> anyhow::Result<schema::SchemaStatus> {
256 let storage = IndexConnection::open(path)?;
257 schema::status(storage.connection())
258 }
259
260 fn create_or_migrate(path: &Path) -> anyhow::Result<Self> {
261 let mut storage = IndexConnection::open(path)?;
262 schema::apply(storage.connection())?;
263 ai::ensure_model_manifest(storage.connection())?;
264 if let Some(root) = meta_for(storage.connection(), "source_root")? {
265 storage.set_source_root(PathBuf::from(root));
266 }
267 Ok(Self { storage, active_commit_sha: String::new(), active_worktree_id: String::new() })
268 }
269
270 pub fn set_context(&mut self, commit_sha: &str, worktree_id: &str) -> anyhow::Result<()> {
271 self.active_commit_sha = commit_sha.to_string();
272 self.active_worktree_id = worktree_id.to_string();
273
274 let conn = self.storage.connection();
275 conn.execute_batch(
276 "
277 CREATE TEMP TABLE IF NOT EXISTS connection_context(key TEXT PRIMARY KEY, value TEXT);
278 ",
279 )?;
280
281 let mut stmt = conn.prepare(
282 "INSERT OR REPLACE INTO temp.connection_context(key, value) VALUES (?1, ?2)",
283 )?;
284 stmt.execute(params!["commit_sha", commit_sha])?;
285 stmt.execute(params!["worktree_id", worktree_id])?;
286
287 conn.execute_batch("
288 DROP VIEW IF EXISTS temp.files;
289 CREATE TEMP VIEW temp.files AS
290 SELECT id, path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms, indexed_revision, commit_sha, worktree_id
291 FROM main.files
292 WHERE worktree_id = (SELECT value FROM temp.connection_context WHERE key = 'worktree_id') AND worktree_id != '' AND kind != 'deleted'
293 UNION ALL
294 SELECT id, path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms, indexed_revision, commit_sha, worktree_id
295 FROM main.files
296 WHERE commit_sha = (SELECT value FROM temp.connection_context WHERE key = 'commit_sha')
297 AND commit_sha != ''
298 AND path NOT IN (
299 SELECT path FROM main.files
300 WHERE worktree_id = (SELECT value FROM temp.connection_context WHERE key = 'worktree_id')
301 AND worktree_id != ''
302 );
303 ")?;
304
305 Ok(())
306 }
307
308 pub fn rebuild(config: &Config) -> anyhow::Result<Self> {
309 Self::rebuild_with_progress(config, |_| {})
310 }
311
312 pub fn rebuild_with_progress<F>(config: &Config, mut progress: F) -> anyhow::Result<Self>
313 where
314 F: FnMut(IndexProgress),
315 {
316 progress(IndexProgress::Started {
317 database: config.database.clone(),
318 mode: IndexMode::Full,
319 });
320 let mut db = Self::create_or_migrate(&config.database)?;
321 let (commit_sha, worktree_id) = resolve_git_context(&config.root);
322 db.set_context(&commit_sha, &worktree_id)?;
323 progress(IndexProgress::IndexingGitHistory);
324 let mut git_history = Some(spawn_git_history_prepare(&config.root));
325 db.storage.execute_batch(
335 "PRAGMA synchronous = OFF;
336 PRAGMA cache_size = -262144;",
337 )?;
338 let result = (|| -> anyhow::Result<()> {
339 db.storage.execute_batch("BEGIN TRANSACTION")?;
340 db.clear_full_rebuild_tables()?;
341 db.set_meta("source_root", &config.root.display().to_string())?;
342 db.storage.set_source_root(config.root.clone());
343 db.write_git_meta(&config.root)?;
344 let indexed = db.index_targets_with_progress(config, &mut progress)?;
345 db.apply_prepared_git_history(
346 &config.root,
347 git_history
348 .take()
349 .ok_or_else(|| anyhow::anyhow!("git history preparation was already used"))?,
350 )?;
351 progress(IndexProgress::RebuildingLogicalSymbols);
352 db.rebuild_logical_symbols()?;
353 progress(IndexProgress::ResolvingGraph);
354 db.resolve_edges()?;
355 db.mark_graph_index_current()?;
356 progress(IndexProgress::RebuildingFts);
357 db.rebuild_fts()?;
358 db.set_meta("indexed_at_ms", &now_ms().to_string())?;
359 db.storage.execute_batch("COMMIT")?;
360 progress(IndexProgress::Finished { files: indexed });
361 Ok(())
362 })();
363 if result.is_err() {
364 if let Some(handle) = git_history.take() {
365 let _ = join_git_history_prepare(handle);
366 }
367 let _ = db.storage.execute_batch("ROLLBACK");
368 }
369 let _ = db.storage.execute_batch("PRAGMA synchronous = NORMAL;");
372 result?;
373 Ok(db)
374 }
375
376 fn clear_full_rebuild_tables(&self) -> anyhow::Result<()> {
377 self.storage.execute_batch(
379 "
380 CREATE TEMP TABLE IF NOT EXISTS staged_file_ids(id INTEGER PRIMARY KEY);
381 DELETE FROM temp.staged_file_ids;
382 INSERT OR IGNORE INTO temp.staged_file_ids(id)
383 SELECT id
384 FROM main.files
385 WHERE worktree_id = (SELECT value FROM temp.connection_context WHERE key = 'worktree_id')
386 AND worktree_id != '';
387 INSERT OR IGNORE INTO temp.staged_file_ids(id)
388 SELECT id
389 FROM main.files
390 WHERE commit_sha = (SELECT value FROM temp.connection_context WHERE key = 'commit_sha')
391 AND commit_sha != ''
392 AND path NOT IN (
393 SELECT path FROM main.files
394 WHERE worktree_id = (SELECT value FROM temp.connection_context WHERE key = 'worktree_id')
395 AND worktree_id != ''
396 );
397 ",
398 )?;
399 self.delete_staged_files_cascade()?;
400 self.storage.execute_batch("DELETE FROM temp.staged_file_ids;")?;
401 Ok(())
402 }
403
404 fn delete_staged_files_cascade(&self) -> anyhow::Result<()> {
409 self.storage.execute_batch(
410 "
411 UPDATE main.edges
412 SET to_symbol_id = NULL,
413 target_start_line = NULL,
414 target_end_line = NULL,
415 resolution = 'unresolved'
416 WHERE to_symbol_id IN (
417 SELECT symbols.id
418 FROM main.symbols
419 JOIN temp.staged_file_ids ON staged_file_ids.id = symbols.file_id
420 );
421 DELETE FROM main.edges
422 WHERE source_file_id IN (SELECT id FROM temp.staged_file_ids)
423 OR from_symbol_id IN (
424 SELECT symbols.id
425 FROM main.symbols
426 JOIN temp.staged_file_ids ON staged_file_ids.id = symbols.file_id
427 );
428
429 DELETE FROM main.logical_symbol_members
430 WHERE symbol_id IN (
431 SELECT symbols.id
432 FROM main.symbols
433 JOIN temp.staged_file_ids ON staged_file_ids.id = symbols.file_id
434 );
435 DELETE FROM main.logical_symbols
436 WHERE id NOT IN (
437 SELECT logical_symbol_id FROM main.logical_symbol_members
438 );
439 DELETE FROM main.symbol_facts
440 WHERE symbol_id IN (
441 SELECT symbols.id
442 FROM main.symbols
443 JOIN temp.staged_file_ids ON staged_file_ids.id = symbols.file_id
444 );
445 DELETE FROM main.chunk_fts
446 WHERE rowid IN (
447 SELECT chunks.id
448 FROM main.chunks
449 JOIN temp.staged_file_ids ON staged_file_ids.id = chunks.file_id
450 );
451 DELETE FROM main.chunk_summaries
452 WHERE chunk_id IN (
453 SELECT chunks.id
454 FROM main.chunks
455 JOIN temp.staged_file_ids ON staged_file_ids.id = chunks.file_id
456 );
457 DELETE FROM main.chunk_embeddings
458 WHERE chunk_id IN (
459 SELECT chunks.id
460 FROM main.chunks
461 JOIN temp.staged_file_ids ON staged_file_ids.id = chunks.file_id
462 );
463 DELETE FROM main.git_chunk_blame
464 WHERE chunk_id IN (
465 SELECT chunks.id
466 FROM main.chunks
467 JOIN temp.staged_file_ids ON staged_file_ids.id = chunks.file_id
468 );
469 DELETE FROM main.docs
470 WHERE chunk_id IN (
471 SELECT chunks.id
472 FROM main.chunks
473 JOIN temp.staged_file_ids ON staged_file_ids.id = chunks.file_id
474 );
475 DELETE FROM main.parser_failures
476 WHERE path IN (
477 SELECT path
478 FROM main.files
479 JOIN temp.staged_file_ids ON staged_file_ids.id = files.id
480 );
481 DELETE FROM main.symbols
482 WHERE file_id IN (SELECT id FROM temp.staged_file_ids);
483 DELETE FROM main.chunks
484 WHERE file_id IN (SELECT id FROM temp.staged_file_ids);
485 DELETE FROM main.files
486 WHERE id IN (SELECT id FROM temp.staged_file_ids);
487 ",
488 )?;
489 Ok(())
490 }
491
492 pub fn index_changed(config: &Config) -> anyhow::Result<Self> {
493 Self::index_changed_with_progress(config, |_| {})
494 }
495
496 pub fn index_changed_with_progress<F>(config: &Config, mut progress: F) -> anyhow::Result<Self>
497 where
498 F: FnMut(IndexProgress),
499 {
500 Self::index_incremental_with_progress(config, IndexMode::Changed, &mut progress)
501 }
502
503 pub fn index_discover(config: &Config) -> anyhow::Result<Self> {
504 Self::index_discover_with_progress(config, |_| {})
505 }
506
507 pub fn index_discover_with_progress<F>(config: &Config, mut progress: F) -> anyhow::Result<Self>
508 where
509 F: FnMut(IndexProgress),
510 {
511 Self::index_incremental_with_progress(config, IndexMode::Discover, &mut progress)
512 }
513
514 fn index_incremental_with_progress<F>(
515 config: &Config,
516 mode: IndexMode,
517 progress: &mut F,
518 ) -> anyhow::Result<Self>
519 where
520 F: FnMut(IndexProgress),
521 {
522 if !config.database.exists() {
523 return Self::rebuild_with_progress(config, progress);
524 }
525 if Self::migration_check(&config.database)?.state == schema::SchemaState::Missing {
526 return Self::rebuild_with_progress(config, progress);
527 }
528
529 let mut db = Self::open(&config.database)?;
530 let (commit_sha, worktree_id) = resolve_git_context(&config.root);
531 db.set_context(&commit_sha, &worktree_id)?;
532 if db.indexed_file_count()? == 0 {
533 return Self::rebuild_with_progress(config, progress);
534 }
535 progress(IndexProgress::Started { database: config.database.clone(), mode });
536 progress(IndexProgress::IndexingGitHistory);
537 let mut git_history = Some(spawn_git_history_prepare(&config.root));
538 let result = (|| -> anyhow::Result<()> {
539 db.storage.execute_batch("BEGIN TRANSACTION")?;
540 db.set_meta("source_root", &config.root.display().to_string())?;
541 db.storage.set_source_root(config.root.clone());
542 db.write_git_meta(&config.root)?;
543 let indexed = match mode {
544 IndexMode::Changed => db.index_changed_files_with_progress(config, progress)?,
545 IndexMode::Discover => db.index_discovered_files_with_progress(config, progress)?,
546 IndexMode::Full => unreachable!("full mode is handled by rebuild_with_progress"),
547 };
548 db.apply_prepared_git_history(
549 &config.root,
550 git_history
551 .take()
552 .ok_or_else(|| anyhow::anyhow!("git history preparation was already used"))?,
553 )?;
554 if indexed > 0 {
555 progress(IndexProgress::RebuildingLogicalSymbols);
556 db.rebuild_logical_symbols()?;
557 progress(IndexProgress::ResolvingGraph);
558 db.resolve_edges()?;
559 db.mark_graph_index_current()?;
560 progress(IndexProgress::SyncingFts);
561 db.sync_fts()?;
562 }
563 db.set_meta("indexed_at_ms", &now_ms().to_string())?;
564 db.storage.execute_batch("COMMIT")?;
565 progress(IndexProgress::Finished { files: indexed });
566 Ok(())
567 })();
568 if result.is_err() {
569 if let Some(handle) = git_history.take() {
570 let _ = join_git_history_prepare(handle);
571 }
572 let _ = db.storage.execute_batch("ROLLBACK");
573 }
574 result?;
575 Ok(db)
576 }
577
578 pub fn index_targets(&self, config: &Config) -> anyhow::Result<()> {
579 self.index_targets_with_progress(config, &mut |_| {})?;
580 Ok(())
581 }
582
583 fn index_targets_with_progress<F>(
584 &self,
585 config: &Config,
586 progress: &mut F,
587 ) -> anyhow::Result<usize>
588 where
589 F: FnMut(IndexProgress),
590 {
591 progress(IndexProgress::Discovering);
592 let files = collect_index_files(config)?;
593 let changes = git_changed_paths(&config.root).unwrap_or_default();
594 let files = self.assign_file_scopes(files, &changes);
595 progress(IndexProgress::Discovered { files: files.len() });
596
597 let prepared = prepare_files_with_progress(&files, progress)?;
598 for (index, prepared_file) in prepared.iter().enumerate() {
599 let current = index + 1;
600 if should_report_file_progress(current, files.len()) {
601 progress(IndexProgress::IndexingFile {
602 current,
603 total: files.len(),
604 path: prepared_file.file.relative_path.clone(),
605 language: prepared_file.file.language,
606 kind: prepared_file.file.kind,
607 });
608 }
609 self.insert_prepared_file(prepared_file)?;
610 }
611
612 Ok(files.len())
613 }
614
615 fn index_changed_files_with_progress<F>(
616 &self,
617 config: &Config,
618 progress: &mut F,
619 ) -> anyhow::Result<usize>
620 where
621 F: FnMut(IndexProgress),
622 {
623 progress(IndexProgress::Discovering);
624 let changes = git_changed_paths(&config.root)?;
625 let files = collect_changed_index_files(config, &changes)?;
626 let files = self.assign_file_scopes(files, &changes);
627 self.apply_incremental_file_plan(files, changes.deleted, progress)
628 }
629
630 fn index_discovered_files_with_progress<F>(
631 &self,
632 config: &Config,
633 progress: &mut F,
634 ) -> anyhow::Result<usize>
635 where
636 F: FnMut(IndexProgress),
637 {
638 progress(IndexProgress::Discovering);
639 let plan = discovery_plan(self.storage.connection(), config)?;
640 let changes = git_changed_paths(&config.root).unwrap_or_default();
641 let files = self.assign_file_scopes(plan.files, &changes);
642 self.apply_incremental_file_plan(files, plan.deleted, progress)
643 }
644
645 fn assign_file_scopes(
646 &self,
647 files: Vec<IndexFile>,
648 changes: &GitChangedPaths,
649 ) -> Vec<IndexFile> {
650 let has_base_commit = !self.active_commit_sha.is_empty();
651 files
652 .into_iter()
653 .map(|mut file| {
654 if !has_base_commit || changes.changed.contains(&file.relative_path) {
655 file.commit_sha.clear();
656 file.worktree_id.clone_from(&self.active_worktree_id);
657 } else {
658 file.commit_sha.clone_from(&self.active_commit_sha);
659 file.worktree_id.clear();
660 }
661 file
662 })
663 .collect()
664 }
665
666 fn apply_incremental_file_plan<F>(
667 &self,
668 files: Vec<IndexFile>,
669 deleted: BTreeSet<PathBuf>,
670 progress: &mut F,
671 ) -> anyhow::Result<usize>
672 where
673 F: FnMut(IndexProgress),
674 {
675 progress(IndexProgress::Discovered { files: files.len() });
676
677 let deleted_count = deleted.len();
678 for path in deleted {
679 self.mark_file_deleted(&path)?;
680 }
681
682 let prepared = prepare_files_with_progress(&files, progress)?;
683 for (index, prepared_file) in prepared.iter().enumerate() {
684 let current = index + 1;
685 if should_report_file_progress(current, files.len()) {
686 progress(IndexProgress::IndexingFile {
687 current,
688 total: files.len(),
689 path: prepared_file.file.relative_path.clone(),
690 language: prepared_file.file.language,
691 kind: prepared_file.file.kind,
692 });
693 }
694 self.remove_file_in_scope(
695 &prepared_file.file.relative_path,
696 &prepared_file.file.commit_sha,
697 &prepared_file.file.worktree_id,
698 )?;
699 self.insert_prepared_file(prepared_file)?;
700 }
701
702 Ok(files.len() + deleted_count)
703 }
704
705 pub fn status(&self, database: &Path) -> anyhow::Result<IndexStatus> {
706 let mut counts = BTreeMap::new();
707 let mut stmt = self
708 .storage
709 .connection()
710 .prepare("SELECT language, COUNT(*) FROM files GROUP BY language ORDER BY language")?;
711 let rows =
712 stmt.query_map([], |row| Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?)))?;
713 for row in rows {
714 let (language, count) = row?;
715 counts.insert(language, u64::try_from(count).unwrap_or(0));
716 }
717
718 let content_revision = self.content_revision()?;
719 let fts_source_revision = self.meta("fts_source_revision")?;
720 let fts_dirty = self.fts_dirty()?;
721
722 Ok(IndexStatus {
723 database: database.display().to_string(),
724 exists: database.exists(),
725 schema: schema::status(self.storage.connection())?,
726 git_commit: self.meta("git_commit")?,
727 git_dirty: self.meta("git_dirty")?.map(|value| value == "true"),
728 indexed_at_ms: self.meta("indexed_at_ms")?.and_then(|value| value.parse::<i64>().ok()),
729 content_revision: content_revision.clone(),
730 fts_synced_at_ms: self
731 .meta("fts_synced_at_ms")?
732 .and_then(|value| value.parse::<i64>().ok()),
733 fts_dirty,
734 fts_fresh: !fts_dirty
735 && fts_source_revision.as_deref() == Some(content_revision.as_str()),
736 fts_source_revision,
737 file_count_by_language: counts,
738 parser_failures: self.parser_failure_count()?,
739 parser_failure_paths: self.parser_failure_paths()?,
740 git_history: self.git_history_status()?,
741 github: self.github_status()?,
742 local_ai: self.local_ai_status()?,
743 })
744 }
745
746 pub fn storage_status(&self) -> anyhow::Result<StorageStatus> {
747 self.storage.status()
748 }
749
750 pub fn discovery_status(&self, config: &Config) -> anyhow::Result<DiscoveryStatus> {
751 let plan = discovery_plan(self.storage.connection(), config)?;
752 let unindexed_source_files =
753 plan.unindexed.iter().filter(|file| file.kind == TargetKind::Source).count();
754 let unindexed_sample =
755 plan.unindexed.iter().take(10).map(|file| path_string(&file.relative_path)).collect();
756 let warning = (unindexed_source_files > 0).then(|| {
757 format!(
758 "{unindexed_source_files} unindexed source files detected. Run `rag-rat index --full` or `rag-rat index --discover`."
759 )
760 });
761 Ok(DiscoveryStatus {
762 discovered_files: plan.discovered_files,
763 indexed_files: plan.indexed_files,
764 unindexed_files: plan.unindexed.len(),
765 unindexed_source_files,
766 changed_indexed_files: plan.changed.len(),
767 removed_indexed_files: plan.deleted.len(),
768 unindexed_sample,
769 warning,
770 })
771 }
772
773 pub fn search(
774 &self,
775 query: &str,
776 limit: u32,
777 include_generated: bool,
778 ) -> anyhow::Result<Vec<SearchHit>> {
779 self.search_with_graph_meta(query, limit, include_generated, GraphMetaMode::Compact, 3)
780 }
781
782 pub fn search_explain(
783 &self,
784 query: &str,
785 limit: u32,
786 include_generated: bool,
787 ) -> anyhow::Result<Vec<SearchHit>> {
788 self.search_explain_with_graph_meta(
789 query,
790 limit,
791 include_generated,
792 GraphMetaMode::Compact,
793 3,
794 )
795 }
796
797 pub fn search_with_graph_meta(
798 &self,
799 query: &str,
800 limit: u32,
801 include_generated: bool,
802 graph_mode: GraphMetaMode,
803 graph_limit: u32,
804 ) -> anyhow::Result<Vec<SearchHit>> {
805 self.search_with_graph_meta_options(
806 query,
807 limit,
808 include_generated,
809 graph_mode,
810 graph_limit,
811 SearchOptions::default(),
812 )
813 }
814
815 pub fn search_with_graph_meta_options(
816 &self,
817 query: &str,
818 limit: u32,
819 include_generated: bool,
820 graph_mode: GraphMetaMode,
821 graph_limit: u32,
822 options: SearchOptions,
823 ) -> anyhow::Result<Vec<SearchHit>> {
824 self.ensure_fts_fresh()?;
825 let mut hits =
826 self.search_with_heal(query, limit, include_generated, true, false, options)?;
827 graph_meta::attach_to_search_hits(
828 self.storage.connection(),
829 &mut hits,
830 graph_mode,
831 graph_limit,
832 )?;
833 Ok(hits)
834 }
835
836 pub fn search_explain_with_graph_meta(
837 &self,
838 query: &str,
839 limit: u32,
840 include_generated: bool,
841 graph_mode: GraphMetaMode,
842 graph_limit: u32,
843 ) -> anyhow::Result<Vec<SearchHit>> {
844 self.search_explain_with_graph_meta_options(
845 query,
846 limit,
847 include_generated,
848 graph_mode,
849 graph_limit,
850 SearchOptions::default(),
851 )
852 }
853
854 pub fn search_explain_with_graph_meta_options(
855 &self,
856 query: &str,
857 limit: u32,
858 include_generated: bool,
859 graph_mode: GraphMetaMode,
860 graph_limit: u32,
861 options: SearchOptions,
862 ) -> anyhow::Result<Vec<SearchHit>> {
863 self.ensure_fts_fresh()?;
864 let mut hits =
865 self.search_with_heal(query, limit, include_generated, true, true, options)?;
866 graph_meta::attach_to_search_hits(
867 self.storage.connection(),
868 &mut hits,
869 graph_mode,
870 graph_limit,
871 )?;
872 Ok(hits)
873 }
874
875 pub fn symbols(
876 &self,
877 name: &str,
878 language: Option<Language>,
879 limit: u32,
880 ) -> anyhow::Result<Vec<crate::query::symbol::SymbolHit>> {
881 crate::query::symbol::lookup(self.storage.connection(), name, language, limit)
882 }
883
884 pub fn symbol_candidates(
885 &self,
886 selector: &crate::query::symbol::SymbolSelector,
887 ) -> anyhow::Result<crate::query::symbol::SymbolLookup> {
888 crate::query::symbol::lookup_candidates(self.storage.connection(), selector)
889 }
890
891 pub fn select_symbol(
892 &self,
893 selector: &crate::query::symbol::SymbolSelector,
894 ) -> anyhow::Result<
895 Result<Option<crate::query::symbol::SymbolHit>, crate::query::symbol::SymbolDisambiguation>,
896 > {
897 crate::query::symbol::select_one(self.storage.connection(), selector)
898 }
899
900 pub fn read_chunk(&self, chunk_id: i64) -> anyhow::Result<Option<crate::query::ReadChunk>> {
901 self.read_chunk_with_graph_and_memories(chunk_id, GraphMetaMode::Full, 20, true)
902 }
903
904 pub fn read_chunk_with_graph(
905 &self,
906 chunk_id: i64,
907 graph_mode: GraphMetaMode,
908 graph_limit: u32,
909 ) -> anyhow::Result<Option<crate::query::ReadChunk>> {
910 self.read_chunk_with_graph_and_memories(chunk_id, graph_mode, graph_limit, false)
911 }
912
913 pub fn read_chunk_with_graph_and_memories(
914 &self,
915 chunk_id: i64,
916 graph_mode: GraphMetaMode,
917 graph_limit: u32,
918 include_memories: bool,
919 ) -> anyhow::Result<Option<crate::query::ReadChunk>> {
920 let Some(mut chunk) = self.read_chunk_current(chunk_id)? else {
921 return Ok(None);
922 };
923 graph_meta::attach_to_read_chunk(
924 self.storage.connection(),
925 &mut chunk,
926 graph_mode,
927 graph_limit,
928 )?;
929 if include_memories {
930 chunk.memories =
931 crate::query::memory::memories_for_chunk(self.storage.connection(), chunk_id, 20)?;
932 }
933 Ok(Some(chunk))
934 }
935
936 fn read_chunk_current(&self, chunk_id: i64) -> anyhow::Result<Option<crate::query::ReadChunk>> {
937 let Some(mut chunk) = crate::query::read_chunk(self.storage.connection(), chunk_id)? else {
938 return Ok(None);
939 };
940 let Some(root) = self.storage.source_root() else {
941 return Ok(Some(chunk));
942 };
943 let source_path = root.join(&chunk.path);
944 let current_text = match fs::read_to_string(&source_path) {
945 Ok(text) => text,
946 Err(_) => {
947 let path = chunk.path.clone();
948 self.mark_file_deleted(Path::new(&path))?;
949 self.sync_fts()?;
950 anyhow::bail!(IndexError::Gone { chunk_id });
951 },
952 };
953 let anchor = self.chunk_anchor(chunk_id)?;
954 let status = anchors::validate(
955 &chunk.text,
956 usize::try_from(chunk.start_line).unwrap_or(1),
957 usize::try_from(chunk.end_line).unwrap_or(1),
958 &anchor,
959 ¤t_text,
960 );
961 match status {
962 AnchorStatus::Exact => {
963 if let Some(text) = anchors::slice_lines(
964 ¤t_text,
965 usize::try_from(chunk.start_line).unwrap_or(1),
966 usize::try_from(chunk.end_line).unwrap_or(1),
967 ) {
968 chunk.text = text;
969 }
970 Ok(Some(chunk))
971 },
972 AnchorStatus::Relocated { start_line, end_line, text } => {
973 chunk.start_line = i64::try_from(start_line)?;
974 chunk.end_line = i64::try_from(end_line)?;
975 chunk.text = text;
976 Ok(Some(chunk))
977 },
978 AnchorStatus::Stale => {
979 self.heal_file(Path::new(&chunk.path))?;
980 self.sync_fts()?;
981 let healed = crate::query::read_chunk(self.storage.connection(), chunk_id)?;
982 match healed {
983 Some(chunk) => Ok(Some(chunk)),
984 None => anyhow::bail!(IndexError::StaleChunk { chunk_id, path: chunk.path }),
985 }
986 },
987 }
988 }
989
990 pub fn search_hash_baseline(
991 &self,
992 query: &str,
993 limit: u32,
994 include_generated: bool,
995 ) -> anyhow::Result<Vec<SearchHit>> {
996 self.ensure_fts_fresh()?;
997 crate::search::lexical::search_hash_baseline(
998 self.storage.connection(),
999 query,
1000 limit,
1001 include_generated,
1002 )
1003 }
1004
1005 pub fn docs_for_symbol(&self, symbol: &str, limit: u32) -> anyhow::Result<Vec<SearchHit>> {
1006 self.search(symbol, limit, true)
1007 }
1008
1009 pub fn docs_for_selected_symbol(
1010 &self,
1011 symbol: &crate::query::symbol::SymbolHit,
1012 limit: u32,
1013 ) -> anyhow::Result<Vec<SearchHit>> {
1014 let mut hits = self.local_symbol_context_hits(symbol, limit)?;
1015 hits.extend(self.search(&symbol.name, limit.saturating_mul(4).max(limit), true)?);
1016 rank_docs_for_symbol(symbol, &mut hits);
1017 dedupe_search_hits(&mut hits);
1018 hits.truncate(usize::try_from(limit).unwrap_or(usize::MAX));
1019 Ok(hits)
1020 }
1021
1022 pub fn commit_search(&self, query: &str, limit: u32) -> anyhow::Result<Vec<CommitSearchHit>> {
1023 git_history::commit_search(self.storage.connection(), query, limit)
1024 }
1025
1026 pub fn git_history_for_path(
1027 &self,
1028 path: &str,
1029 limit: u32,
1030 ) -> anyhow::Result<Vec<PathHistoryItem>> {
1031 git_history::history_for_path(self.storage.connection(), path, limit)
1032 }
1033
1034 pub fn git_history_for_symbol(
1035 &self,
1036 symbol: &str,
1037 language: Option<Language>,
1038 limit: u32,
1039 ) -> anyhow::Result<Vec<SymbolHistoryItem>> {
1040 let symbols = self.symbols(symbol, language, limit)?;
1041 let per_symbol_limit = limit.max(1);
1042 let mut out = Vec::new();
1043 for symbol_hit in symbols {
1044 for commit in self.git_history_for_path(&symbol_hit.path, per_symbol_limit)? {
1045 out.push(SymbolHistoryItem {
1046 symbol: symbol_hit.name.clone(),
1047 qualified_name: symbol_hit.qualified_name.clone(),
1048 path: symbol_hit.path.clone(),
1049 start_byte: symbol_hit.start_byte,
1050 end_byte: symbol_hit.end_byte,
1051 commit,
1052 evidence_kind: "historical",
1053 });
1054 if out.len() >= usize::try_from(limit).unwrap_or(usize::MAX) {
1055 return Ok(out);
1056 }
1057 }
1058 }
1059 Ok(out)
1060 }
1061
1062 pub fn commits_touching_query(
1063 &self,
1064 query: &str,
1065 limit: u32,
1066 ) -> anyhow::Result<Vec<QueryCommitHit>> {
1067 let current_hits = self.search(query, limit, true)?;
1068 git_history::commits_touching_query(self.storage.connection(), query, limit, ¤t_hits)
1069 }
1070
1071 pub fn git_blame_chunk(&self, chunk_id: i64) -> anyhow::Result<Option<ChunkBlameSummary>> {
1072 let Some(chunk) = self.read_chunk(chunk_id)? else {
1073 return Ok(None);
1074 };
1075 let source_text_hash = git_history::source_text_hash(&chunk.text);
1076 if let Some(cached) =
1077 git_history::cached_blame(self.storage.connection(), chunk_id, &source_text_hash)?
1078 {
1079 return Ok(Some(cached));
1080 }
1081 let Some(root) = self.storage.source_root() else {
1082 return Ok(Some(ChunkBlameSummary {
1083 chunk_id,
1084 path: chunk.path,
1085 start_line: chunk.start_line,
1086 end_line: chunk.end_line,
1087 source_text_hash,
1088 line_count: 0,
1089 dominant_commit: None,
1090 dominant_commit_lines: 0,
1091 newest_commit: None,
1092 newest_commit_time_s: None,
1093 oldest_commit: None,
1094 oldest_commit_time_s: None,
1095 commit_counts: BTreeMap::new(),
1096 evidence_kind: "historical",
1097 }));
1098 };
1099 let blame_lines =
1100 git_history::blame_lines(root, &chunk.path, chunk.start_line, chunk.end_line);
1101 let mut counts = BTreeMap::<String, i64>::new();
1102 let mut newest = None::<(String, i64)>;
1103 let mut oldest = None::<(String, i64)>;
1104 for line in &blame_lines {
1105 *counts.entry(line.commit.clone()).or_default() += 1;
1106 if let Some(time) = line.author_time_s {
1107 if newest.as_ref().is_none_or(|(_, newest_time)| time > *newest_time) {
1108 newest = Some((line.commit.clone(), time));
1109 }
1110 if oldest.as_ref().is_none_or(|(_, oldest_time)| time < *oldest_time) {
1111 oldest = Some((line.commit.clone(), time));
1112 }
1113 }
1114 }
1115 let dominant = counts
1116 .iter()
1117 .max_by_key(|(commit, count)| (*count, *commit))
1118 .map(|(commit, count)| (commit.clone(), *count));
1119 let summary = ChunkBlameSummary {
1120 chunk_id,
1121 path: chunk.path,
1122 start_line: chunk.start_line,
1123 end_line: chunk.end_line,
1124 source_text_hash,
1125 line_count: i64::try_from(blame_lines.len()).unwrap_or(i64::MAX),
1126 dominant_commit: dominant.as_ref().map(|(commit, _)| commit.clone()),
1127 dominant_commit_lines: dominant.map(|(_, count)| count).unwrap_or(0),
1128 newest_commit: newest.as_ref().map(|(commit, _)| commit.clone()),
1129 newest_commit_time_s: newest.as_ref().map(|(_, time)| *time),
1130 oldest_commit: oldest.as_ref().map(|(commit, _)| commit.clone()),
1131 oldest_commit_time_s: oldest.as_ref().map(|(_, time)| *time),
1132 commit_counts: counts,
1133 evidence_kind: "historical",
1134 };
1135 git_history::store_blame(self.storage.connection(), &summary)?;
1136 Ok(Some(summary))
1137 }
1138
1139 pub fn github_sync_from_refs(&self, offline: bool) -> anyhow::Result<GitHubSyncReport> {
1140 self.github_sync_from_refs_with_progress(offline, |_| {})
1141 }
1142
1143 pub fn github_sync_from_refs_with_progress(
1144 &self,
1145 offline: bool,
1146 progress: impl FnMut(github::GitHubSyncProgress),
1147 ) -> anyhow::Result<GitHubSyncReport> {
1148 let Some(root) = self.storage.source_root() else {
1149 anyhow::bail!("index has no source_root metadata; rebuild required");
1150 };
1151 if offline {
1152 github::sync_from_refs::<github::GhCliGitHubClient>(
1153 self.storage.connection(),
1154 root,
1155 None,
1156 true,
1157 )
1158 } else {
1159 let client = github::GhCliGitHubClient;
1160 github::sync_from_refs_with_progress(
1161 self.storage.connection(),
1162 root,
1163 Some(&client),
1164 false,
1165 progress,
1166 )
1167 }
1168 }
1169
1170 pub fn github_sync_issue(
1171 &self,
1172 issue_ref: &str,
1173 offline: bool,
1174 ) -> anyhow::Result<GitHubSyncReport> {
1175 if offline {
1176 github::sync_issue::<github::GhCliGitHubClient>(
1177 self.storage.connection(),
1178 issue_ref,
1179 None,
1180 true,
1181 )
1182 } else {
1183 let client = github::GhCliGitHubClient;
1184 github::sync_issue(self.storage.connection(), issue_ref, Some(&client), false)
1185 }
1186 }
1187
1188 pub fn github_issue_search(
1189 &self,
1190 query: &str,
1191 limit: u32,
1192 ) -> anyhow::Result<Vec<GitHubEvidence>> {
1193 github::issue_search(self.storage.connection(), query, limit)
1194 }
1195
1196 pub fn rationale_search(&self, query: &str, limit: u32) -> anyhow::Result<Vec<GitHubEvidence>> {
1197 github::rationale_search(self.storage.connection(), query, limit)
1198 }
1199
1200 pub fn github_refs_for_path(
1201 &self,
1202 path: &str,
1203 limit: u32,
1204 ) -> anyhow::Result<Vec<github::GitHubRef>> {
1205 github::refs_for_path(self.storage.connection(), path, limit)
1206 }
1207
1208 pub fn github_sync_status(&self) -> anyhow::Result<GitHubStatus> {
1209 self.github_status()
1210 }
1211
1212 pub fn papertrail_for_chunk(
1213 &self,
1214 chunk_id: i64,
1215 limit: u32,
1216 ) -> anyhow::Result<Option<Papertrail>> {
1217 let Some(chunk) = self.read_chunk(chunk_id)? else {
1218 return Ok(None);
1219 };
1220 Ok(Some(github::papertrail_for_chunk(self.storage.connection(), &chunk, limit)?))
1221 }
1222
1223 pub fn papertrail_for_symbol(
1224 &self,
1225 symbol: &str,
1226 language: Option<Language>,
1227 limit: u32,
1228 ) -> anyhow::Result<Option<Papertrail>> {
1229 let Some(symbol) = self.symbols(symbol, language, limit)?.into_iter().next() else {
1230 return Ok(None);
1231 };
1232 Ok(Some(github::papertrail_for_symbol(self.storage.connection(), &symbol, limit)?))
1233 }
1234
1235 pub fn papertrail_for_selected_symbol(
1236 &self,
1237 symbol: &crate::query::symbol::SymbolHit,
1238 limit: u32,
1239 ) -> anyhow::Result<Papertrail> {
1240 github::papertrail_for_symbol(self.storage.connection(), symbol, limit)
1241 }
1242
1243 pub fn papertrail_for_commit(
1244 &self,
1245 commit_hash: &str,
1246 limit: u32,
1247 ) -> anyhow::Result<Papertrail> {
1248 github::papertrail_for_commit(self.storage.connection(), commit_hash, limit)
1249 }
1250
1251 pub fn local_ai_status(&self) -> anyhow::Result<LocalAiStatus> {
1252 ai::status(self.storage.connection())
1253 }
1254
1255 pub fn list_models(&self) -> anyhow::Result<Vec<ModelInfo>> {
1256 ai::models(self.storage.connection())
1257 }
1258
1259 pub fn install_model(&self, model_id: &str) -> anyhow::Result<ModelInfo> {
1260 ai::install_model(self.storage.connection(), model_id)
1261 }
1262
1263 pub fn reconcile(
1264 &self,
1265 limit: Option<u32>,
1266 batch_size: Option<u32>,
1267 ) -> anyhow::Result<ReconcileReport> {
1268 ai::reconcile(self.storage.connection(), limit, batch_size)
1269 }
1270
1271 pub fn reconcile_plan(&self) -> anyhow::Result<ReconcilePlan> {
1272 ai::reconcile_plan(self.storage.connection())
1273 }
1274
1275 pub fn reconcile_with_progress(
1276 &self,
1277 limit: Option<u32>,
1278 batch_size: Option<u32>,
1279 force: bool,
1280 progress: impl FnMut(ai::ReconcileProgress),
1281 ) -> anyhow::Result<ReconcileReport> {
1282 ai::reconcile_with_progress(self.storage.connection(), limit, batch_size, force, progress)
1283 }
1284
1285 pub fn reconcile_with_options_progress(
1286 &self,
1287 options: ai::ReconcileOptions,
1288 progress: impl FnMut(ai::ReconcileProgress),
1289 ) -> anyhow::Result<ReconcileReport> {
1290 ai::reconcile_with_options_progress(self.storage.connection(), options, progress)
1291 }
1292
1293 pub fn gc(&self) -> anyhow::Result<GcReport> {
1298 let mut live_commits = Vec::new();
1299 let mut live_worktrees = Vec::new();
1300 if let Some(root) = self.storage.source_root() {
1301 let (commits, worktrees) = live_worktree_contexts(root);
1302 live_commits.extend(commits);
1303 live_worktrees.extend(worktrees);
1304 }
1305 if !self.active_commit_sha.is_empty() {
1307 live_commits.push(self.active_commit_sha.clone());
1308 }
1309 if !self.active_worktree_id.is_empty() {
1310 live_worktrees.push(self.active_worktree_id.clone());
1311 }
1312 live_commits.sort();
1313 live_commits.dedup();
1314 live_worktrees.sort();
1315 live_worktrees.dedup();
1316 self.prune_to_live(&live_commits, &live_worktrees)
1317 }
1318
1319 pub fn prune_to_live(
1324 &self,
1325 live_commits: &[String],
1326 live_worktrees: &[String],
1327 ) -> anyhow::Result<GcReport> {
1328 let conn = self.storage.connection();
1329 let files_before = table_row_count(conn, "files")?;
1330 let chunks_before = table_row_count(conn, "chunks")?;
1331 if live_commits.is_empty() && live_worktrees.is_empty() {
1332 return Ok(GcReport {
1333 files_pruned: 0,
1334 chunks_pruned: 0,
1335 files_remaining: files_before,
1336 chunks_remaining: chunks_before,
1337 skipped: true,
1338 });
1339 }
1340 conn.execute_batch(
1341 "
1342 CREATE TEMP TABLE IF NOT EXISTS gc_live_commits(sha TEXT PRIMARY KEY);
1343 DELETE FROM temp.gc_live_commits;
1344 CREATE TEMP TABLE IF NOT EXISTS gc_live_worktrees(id TEXT PRIMARY KEY);
1345 DELETE FROM temp.gc_live_worktrees;
1346 CREATE TEMP TABLE IF NOT EXISTS staged_file_ids(id INTEGER PRIMARY KEY);
1347 DELETE FROM temp.staged_file_ids;
1348 ",
1349 )?;
1350 {
1351 let mut stmt =
1352 conn.prepare("INSERT OR IGNORE INTO temp.gc_live_commits(sha) VALUES (?1)")?;
1353 for sha in live_commits {
1354 stmt.execute([sha])?;
1355 }
1356 }
1357 {
1358 let mut stmt =
1359 conn.prepare("INSERT OR IGNORE INTO temp.gc_live_worktrees(id) VALUES (?1)")?;
1360 for id in live_worktrees {
1361 stmt.execute([id])?;
1362 }
1363 }
1364 conn.execute(
1367 "
1368 INSERT OR IGNORE INTO temp.staged_file_ids(id)
1369 SELECT id FROM main.files
1370 WHERE commit_sha NOT IN (SELECT sha FROM temp.gc_live_commits)
1371 AND worktree_id NOT IN (SELECT id FROM temp.gc_live_worktrees)
1372 ",
1373 [],
1374 )?;
1375 self.delete_staged_files_cascade()?;
1376 conn.execute_batch("DELETE FROM temp.staged_file_ids;")?;
1377 let files_remaining = table_row_count(conn, "files")?;
1378 let chunks_remaining = table_row_count(conn, "chunks")?;
1379 Ok(GcReport {
1380 files_pruned: files_before.saturating_sub(files_remaining),
1381 chunks_pruned: chunks_before.saturating_sub(chunks_remaining),
1382 files_remaining,
1383 chunks_remaining,
1384 skipped: false,
1385 })
1386 }
1387
1388 pub fn current_embedding_count(&self, model_id: &str) -> anyhow::Result<u64> {
1389 ai::current_embedding_count(self.storage.connection(), model_id)
1390 }
1391
1392 pub fn heal_index(&self, limit: Option<u32>) -> anyhow::Result<HealIndexReport> {
1393 let Some(root) = self.storage.source_root() else {
1394 anyhow::bail!("heal_index requires source_root metadata; run `rag-rat index` first");
1395 };
1396 let indexed_files = self.indexed_files()?;
1397 let max_repairs = limit.map(usize::try_from).transpose()?.unwrap_or(usize::MAX);
1398 let mut report = HealIndexReport {
1399 checked_files: 0,
1400 healed_files: 0,
1401 removed_files: 0,
1402 skipped_files: 0,
1403 fts_fresh: false,
1404 message: None,
1405 };
1406
1407 for file in indexed_files {
1408 report.checked_files += 1;
1409 let path = Path::new(&file.path);
1410 let full_path = root.join(path);
1411 let Ok(text) = fs::read_to_string(&full_path) else {
1412 if usize::try_from(report.healed_files + report.removed_files).unwrap_or(usize::MAX)
1413 >= max_repairs
1414 {
1415 report.message =
1416 Some("limit reached; rerun heal_index to continue".to_string());
1417 break;
1418 }
1419 self.mark_file_deleted(path)?;
1420 report.removed_files += 1;
1421 continue;
1422 };
1423 let sha256 = hex_sha256(text.as_bytes());
1424 if sha256 == file.sha256 {
1425 report.skipped_files += 1;
1426 continue;
1427 }
1428 if usize::try_from(report.healed_files + report.removed_files).unwrap_or(usize::MAX)
1429 >= max_repairs
1430 {
1431 report.message = Some("limit reached; rerun heal_index to continue".to_string());
1432 break;
1433 }
1434 self.heal_file(path)?;
1435 report.healed_files += 1;
1436 }
1437
1438 if report.healed_files > 0 || report.removed_files > 0 {
1439 self.sync_fts()?;
1440 } else {
1441 self.ensure_fts_fresh()?;
1442 }
1443 report.fts_fresh = !self.fts_dirty()?;
1444 Ok(report)
1445 }
1446
1447 pub fn ffi_surface(&self, limit: u32) -> anyhow::Result<Vec<crate::query::impact::ImpactItem>> {
1448 crate::query::impact::ffi_surface(self.storage.connection(), limit)
1449 }
1450
1451 pub fn find_callers(
1452 &self,
1453 symbol: &str,
1454 limit: u32,
1455 ) -> anyhow::Result<Vec<crate::query::graph::GraphHop>> {
1456 crate::query::graph::traverse(self.storage.connection(), symbol, true, limit)
1457 }
1458
1459 pub fn find_callers_with_options(
1460 &self,
1461 symbol: &str,
1462 limit: u32,
1463 options: &crate::query::graph::GraphTraversalOptions,
1464 ) -> anyhow::Result<Vec<crate::query::graph::GraphHop>> {
1465 let options = self.graph_options_with_logical_group(options)?;
1466 crate::query::graph::traverse_with_options(
1467 self.storage.connection(),
1468 symbol,
1469 true,
1470 limit,
1471 &options,
1472 )
1473 }
1474
1475 pub fn trace_callees(
1476 &self,
1477 symbol: &str,
1478 limit: u32,
1479 ) -> anyhow::Result<Vec<crate::query::graph::GraphHop>> {
1480 crate::query::graph::traverse(self.storage.connection(), symbol, false, limit)
1481 }
1482
1483 pub fn trace_callees_with_options(
1484 &self,
1485 symbol: &str,
1486 limit: u32,
1487 options: &crate::query::graph::GraphTraversalOptions,
1488 ) -> anyhow::Result<Vec<crate::query::graph::GraphHop>> {
1489 let options = self.graph_options_with_logical_group(options)?;
1490 crate::query::graph::traverse_with_options(
1491 self.storage.connection(),
1492 symbol,
1493 false,
1494 limit,
1495 &options,
1496 )
1497 }
1498
1499 pub fn graph_traversal_report(
1500 &self,
1501 tool: &str,
1502 symbol: &crate::query::symbol::SymbolHit,
1503 reverse: bool,
1504 limit: u32,
1505 options: &crate::query::graph::GraphTraversalOptions,
1506 ) -> anyhow::Result<crate::query::graph::GraphTraversalReport> {
1507 let options = self.graph_options_with_logical_group(options)?;
1508 let results = crate::query::graph::traverse_with_options(
1509 self.storage.connection(),
1510 &symbol.qualified_name,
1511 reverse,
1512 limit,
1513 &options,
1514 )?;
1515 let summary = crate::query::graph::traversal_summary(
1516 self.storage.connection(),
1517 &symbol.qualified_name,
1518 reverse,
1519 limit,
1520 &options,
1521 results.len(),
1522 )?;
1523 let (logical_symbol, variants) = self.graph_logical_symbol(options.logical_symbol_id)?;
1524 let mut paths = BTreeSet::new();
1525 paths.insert(symbol.path.clone());
1526 for result in &results {
1527 if let Some(callsite) = &result.callsite {
1528 paths.insert(callsite.path.clone());
1529 }
1530 }
1531 let mut coverage = self.graph_coverage(paths)?;
1532 if summary.unresolved > 0 {
1533 coverage.known_index_gaps.push(format!(
1534 "{} unresolved qualified callsites match the requested final segment but are not verified to this symbol",
1535 summary.unresolved
1536 ));
1537 }
1538 Ok(crate::query::graph::GraphTraversalReport {
1539 query: crate::query::graph::GraphTraversalQuery {
1540 tool: tool.to_string(),
1541 symbol_id: Some(symbol.symbol_id),
1542 logical_symbol_id: options.logical_symbol_id,
1543 symbol_path: symbol.qualified_name.clone(),
1544 resolution: options.resolution_mode.as_str().to_string(),
1545 },
1546 logical_symbol,
1547 variants,
1548 summary,
1549 coverage,
1550 results,
1551 })
1552 }
1553
1554 pub fn compare_graph_to_text(
1555 &self,
1556 symbol: &crate::query::symbol::SymbolHit,
1557 pattern: &str,
1558 limit: u32,
1559 options: &crate::query::graph::GraphTraversalOptions,
1560 include_tests: bool,
1561 ) -> anyhow::Result<crate::query::graph::CompareGraphTextReport> {
1562 let regex = Regex::new(pattern)?;
1563 let options = self.graph_options_with_logical_group(options)?;
1564 let mut graph_edges = crate::query::graph::traverse_with_options(
1565 self.storage.connection(),
1566 &symbol.qualified_name,
1567 true,
1568 limit,
1569 &options,
1570 )?;
1571 if !include_tests {
1572 graph_edges.retain(|edge| {
1573 edge.callsite.as_ref().is_none_or(|callsite| !is_test_like_path(&callsite.path))
1574 });
1575 }
1576 let (logical_symbol, variants) = self.graph_logical_symbol(options.logical_symbol_id)?;
1577 let text_hits = self.regex_hits(pattern, ®ex, include_tests)?;
1578 let text_by_location = text_hits
1579 .iter()
1580 .map(|hit| ((hit.path.clone(), hit.line), hit))
1581 .collect::<BTreeMap<_, _>>();
1582 let graph_by_location = graph_edges
1583 .iter()
1584 .filter_map(|edge| {
1585 edge.callsite
1586 .as_ref()
1587 .map(|callsite| ((callsite.path.clone(), callsite.line), edge))
1588 })
1589 .collect::<BTreeMap<_, _>>();
1590
1591 let mut paths = BTreeSet::new();
1592 paths.insert(symbol.path.clone());
1593 for hit in &text_hits {
1594 paths.insert(hit.path.clone());
1595 }
1596 for edge in &graph_edges {
1597 if let Some(callsite) = &edge.callsite {
1598 paths.insert(callsite.path.clone());
1599 }
1600 }
1601
1602 let parser_failure_paths = self
1603 .parser_failure_paths()?
1604 .into_iter()
1605 .map(|failure| failure.path)
1606 .collect::<BTreeSet<_>>();
1607 let mut matched_hits = Vec::new();
1608 let mut text_only_hits = Vec::new();
1609 let mut likely_parser_gaps = Vec::new();
1610 for hit in &text_hits {
1611 if let Some(edge) = graph_by_location.get(&(hit.path.clone(), hit.line)) {
1612 matched_hits.push(crate::query::graph::MatchedGraphTextHit {
1613 path: hit.path.clone(),
1614 line: hit.line,
1615 text: hit.text.clone(),
1616 target: edge.target.clone(),
1617 edge_kind: edge.edge_kind.clone(),
1618 confidence: edge.confidence.clone(),
1619 resolution: edge.resolution.clone(),
1620 });
1621 } else {
1622 let gap_kind = classify_text_only_hit(&hit.path, &hit.text, &parser_failure_paths);
1623 let text_only_hit = crate::query::graph::TextOnlyHit {
1624 path: hit.path.clone(),
1625 line: hit.line,
1626 text: hit.text.clone(),
1627 reason: if gap_kind == "parser_call_extraction" || gap_kind == "parser_failure"
1628 {
1629 "no graph edge extracted"
1630 } else {
1631 "text mention outside graph-call evidence"
1632 }
1633 .to_string(),
1634 likely_gap: gap_kind.to_string(),
1635 };
1636 if is_likely_parser_gap_kind(gap_kind) {
1637 likely_parser_gaps.push(text_only_hit.clone());
1638 }
1639 text_only_hits.push(text_only_hit);
1640 }
1641 }
1642
1643 let mut graph_only_edges = Vec::new();
1644 let mut likely_false_positives = Vec::new();
1645 for edge in &graph_edges {
1646 let Some(callsite) = &edge.callsite else {
1647 continue;
1648 };
1649 if text_by_location.contains_key(&(callsite.path.clone(), callsite.line)) {
1650 continue;
1651 }
1652 let current_line = self.current_line_text(&callsite.path, callsite.line)?;
1653 let graph_only = crate::query::graph::GraphOnlyEdge {
1654 path: callsite.path.clone(),
1655 line: callsite.line,
1656 target: edge.target.clone(),
1657 edge_kind: edge.edge_kind.clone(),
1658 confidence: edge.confidence.clone(),
1659 resolution: edge.resolution.clone(),
1660 evidence: edge.evidence.clone(),
1661 reason: "graph edge exists but pattern did not match text".to_string(),
1662 likely_reason: graph_only_reason(edge, current_line.as_deref()),
1663 };
1664 if is_likely_false_positive_graph_only(edge, &graph_only) {
1665 likely_false_positives.push(graph_only.clone());
1666 }
1667 graph_only_edges.push(graph_only);
1668 }
1669 let complete = likely_parser_gaps.is_empty() && likely_false_positives.is_empty();
1670 let recommended_fallback =
1671 recommended_graph_text_fallback(&likely_parser_gaps, &graph_only_edges);
1672 let pattern_match_mode = compare_pattern_match_mode(pattern, &symbol.name);
1673 let mut warnings = Vec::new();
1674 if pattern_match_mode == "substring_identifier" {
1675 warnings.push(format!(
1676 "pattern may match identifiers that merely contain `{}`; use an identifier boundary or escaped call suffix for exact text auditing",
1677 symbol.name
1678 ));
1679 }
1680
1681 Ok(crate::query::graph::CompareGraphTextReport {
1682 query: crate::query::graph::CompareGraphTextQuery {
1683 symbol_id: Some(symbol.symbol_id),
1684 logical_symbol_id: options.logical_symbol_id,
1685 symbol_path: symbol.qualified_name.clone(),
1686 pattern: pattern.to_string(),
1687 resolution: options.resolution_mode.as_str().to_string(),
1688 include_tests,
1689 },
1690 logical_symbol,
1691 variants,
1692 summary: crate::query::graph::CompareGraphTextSummary {
1693 graph_hits: u64::try_from(graph_edges.len()).unwrap_or(u64::MAX),
1694 graph_edges: u64::try_from(graph_edges.len()).unwrap_or(u64::MAX),
1695 text_hits: u64::try_from(text_hits.len()).unwrap_or(u64::MAX),
1696 matched: u64::try_from(matched_hits.len()).unwrap_or(u64::MAX),
1697 graph_only: u64::try_from(graph_only_edges.len()).unwrap_or(u64::MAX),
1698 text_only: u64::try_from(text_only_hits.len()).unwrap_or(u64::MAX),
1699 text_mentions: u64::try_from(text_only_hits.len() - likely_parser_gaps.len())
1700 .unwrap_or(u64::MAX),
1701 likely_parser_gaps: u64::try_from(likely_parser_gaps.len()).unwrap_or(u64::MAX),
1702 likely_false_positives: u64::try_from(likely_false_positives.len())
1703 .unwrap_or(u64::MAX),
1704 likely_index_gaps: u64::try_from(likely_parser_gaps.len()).unwrap_or(u64::MAX),
1705 complete,
1706 recommended_fallback,
1707 pattern_match_mode,
1708 warnings,
1709 },
1710 coverage: self.graph_coverage(paths)?,
1711 matched_hits,
1712 text_only_hits,
1713 graph_only_edges,
1714 likely_parser_gaps,
1715 likely_false_positives,
1716 })
1717 }
1718
1719 fn graph_logical_symbol(
1720 &self,
1721 logical_symbol_id: Option<i64>,
1722 ) -> anyhow::Result<(
1723 Option<crate::query::graph::LogicalSymbol>,
1724 Vec<crate::query::graph::LogicalSymbolVariant>,
1725 )> {
1726 let Some(logical_symbol_id) = logical_symbol_id else {
1727 return Ok((None, Vec::new()));
1728 };
1729 let Some(logical) = crate::query::symbol::lookup_logical_by_id(
1730 self.storage.connection(),
1731 logical_symbol_id,
1732 )?
1733 else {
1734 return Ok((None, Vec::new()));
1735 };
1736 let variants = crate::query::symbol::logical_members(
1737 self.storage.connection(),
1738 logical.logical_symbol_id,
1739 )?
1740 .into_iter()
1741 .map(|member| crate::query::graph::LogicalSymbolVariant {
1742 symbol_id: member.symbol_id,
1743 cfg_expr: member.cfg_expr,
1744 signature_hash: member.signature_hash,
1745 start_line: member.start_line,
1746 end_line: member.end_line,
1747 })
1748 .collect::<Vec<_>>();
1749 Ok((
1750 Some(crate::query::graph::LogicalSymbol {
1751 logical_symbol_id: logical.logical_symbol_id,
1752 qualified_name: logical.qualified_name,
1753 variant_count: logical.variant_count,
1754 group_reason: logical.group_reason,
1755 }),
1756 variants,
1757 ))
1758 }
1759
1760 fn graph_options_with_logical_group(
1761 &self,
1762 options: &crate::query::graph::GraphTraversalOptions,
1763 ) -> anyhow::Result<crate::query::graph::GraphTraversalOptions> {
1764 if options.logical_symbol_id.is_some() {
1765 return Ok(options.clone());
1766 }
1767 let Some(symbol_id) = options.symbol_id else {
1768 return Ok(options.clone());
1769 };
1770 let Some(logical) =
1771 crate::query::symbol::logical_for_symbol_id(self.storage.connection(), symbol_id)?
1772 else {
1773 return Ok(options.clone());
1774 };
1775 let mut options = options.clone();
1776 options.logical_symbol_id = Some(logical.logical_symbol_id);
1777 Ok(options)
1778 }
1779
1780 fn local_symbol_context_hits(
1781 &self,
1782 symbol: &crate::query::symbol::SymbolHit,
1783 limit: u32,
1784 ) -> anyhow::Result<Vec<SearchHit>> {
1785 let mut stmt = self.storage.connection().prepare(
1786 "
1787 SELECT chunks.id, files.path, files.language, files.kind,
1788 chunks.start_line, chunks.end_line, chunks.symbol_path, chunks.text
1789 FROM chunks
1790 JOIN files ON files.id = chunks.file_id
1791 WHERE files.path = ?1
1792 AND (
1793 chunks.symbol_path = ?2
1794 OR chunks.symbol_path LIKE ?3
1795 OR chunks.text LIKE ?4
1796 )
1797 ORDER BY
1798 CASE
1799 WHEN chunks.symbol_path = ?2 THEN 0
1800 WHEN chunks.symbol_path LIKE ?3 THEN 1
1801 ELSE 2
1802 END,
1803 chunks.start_line
1804 LIMIT ?5
1805 ",
1806 )?;
1807 let rows = stmt.query_map(
1808 params![
1809 symbol.path,
1810 symbol.qualified_name,
1811 format!("%{}%", symbol.name),
1812 format!("%{}%", symbol.name),
1813 i64::from(limit.max(1)),
1814 ],
1815 |row| {
1816 let text: String = row.get(7)?;
1817 Ok(SearchHit {
1818 chunk_id: row.get(0)?,
1819 path: row.get(1)?,
1820 language: row.get(2)?,
1821 kind: row.get(3)?,
1822 start_line: row.get(4)?,
1823 end_line: row.get(5)?,
1824 symbol_path: row.get(6)?,
1825 score: 1.0,
1826 summary: bounded_summary(&text),
1827 graph: None,
1828 score_components: None,
1829 })
1830 },
1831 )?;
1832 let mut hits = Vec::new();
1833 for row in rows {
1834 hits.push(row?);
1835 }
1836 Ok(hits)
1837 }
1838
1839 pub fn impact_surface(
1840 &self,
1841 query: &str,
1842 limit: u32,
1843 ) -> anyhow::Result<Vec<crate::query::impact::ImpactItem>> {
1844 crate::query::impact::impact_surface(self.storage.connection(), query, limit)
1845 }
1846
1847 pub fn impact_surface_with_options(
1848 &self,
1849 query: &str,
1850 limit: u32,
1851 resolution_mode: crate::query::graph::GraphResolutionMode,
1852 ) -> anyhow::Result<Vec<crate::query::impact::ImpactItem>> {
1853 crate::query::impact::impact_surface_with_options(
1854 self.storage.connection(),
1855 query,
1856 limit,
1857 resolution_mode,
1858 )
1859 }
1860
1861 pub fn impact_surface_for_selected_symbol(
1862 &self,
1863 symbol: &crate::query::symbol::SymbolHit,
1864 limit: u32,
1865 resolution_mode: crate::query::graph::GraphResolutionMode,
1866 ) -> anyhow::Result<Vec<crate::query::impact::ImpactItem>> {
1867 crate::query::impact::impact_surface_for_symbol(
1868 self.storage.connection(),
1869 symbol,
1870 limit,
1871 resolution_mode,
1872 )
1873 }
1874
1875 pub fn impact_surface_report_for_selected_symbol(
1876 &self,
1877 symbol: &crate::query::symbol::SymbolHit,
1878 limit: u32,
1879 options: &crate::query::impact::ImpactSurfaceOptions,
1880 ) -> anyhow::Result<crate::query::impact::ImpactSurfaceReport> {
1881 crate::query::impact::impact_surface_report_for_symbol(
1882 self.storage.connection(),
1883 symbol,
1884 limit,
1885 options,
1886 )
1887 }
1888
1889 pub fn repo_brief(
1890 &self,
1891 options: crate::query::repo_brief::RepoBriefOptions,
1892 ) -> anyhow::Result<crate::query::repo_brief::RepoBrief> {
1893 crate::query::repo_brief::repo_brief(self.storage.connection(), options)
1894 }
1895
1896 pub fn repo_clusters(
1897 &self,
1898 options: crate::query::clusters::RepoClustersOptions,
1899 ) -> anyhow::Result<crate::query::clusters::RepoClustersReport> {
1900 crate::query::clusters::repo_clusters(self.storage.connection(), options)
1901 }
1902
1903 pub fn memory_create(
1904 &self,
1905 request: crate::query::memory::RepoMemoryCreate,
1906 ) -> anyhow::Result<crate::query::memory::RepoMemoryCreateResult> {
1907 crate::query::memory::create_memory(self.storage.connection(), request)
1908 }
1909
1910 pub fn memory_update(
1911 &self,
1912 update: crate::query::memory::RepoMemoryUpdate,
1913 ) -> anyhow::Result<crate::query::memory::RepoMemory> {
1914 crate::query::memory::update_memory(self.storage.connection(), update)
1915 }
1916
1917 pub fn memory_mark_obsolete(
1918 &self,
1919 memory_id: &str,
1920 ) -> anyhow::Result<crate::query::memory::RepoMemory> {
1921 crate::query::memory::mark_obsolete(self.storage.connection(), memory_id)
1922 }
1923
1924 pub fn memory_search(
1925 &self,
1926 query: &str,
1927 limit: u32,
1928 ) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
1929 crate::query::memory::memory_search(self.storage.connection(), query, limit)
1930 }
1931
1932 pub fn memory_for_symbol(
1933 &self,
1934 symbol: &crate::query::symbol::SymbolHit,
1935 limit: u32,
1936 ) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
1937 crate::query::memory::memories_for_symbol(self.storage.connection(), symbol, limit)
1938 }
1939
1940 pub fn memory_for_path(
1941 &self,
1942 path: &str,
1943 limit: u32,
1944 ) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
1945 crate::query::memory::memories_for_path(self.storage.connection(), path, limit)
1946 }
1947
1948 pub fn memory_for_edges(
1949 &self,
1950 edge_ids: &[i64],
1951 limit: u32,
1952 ) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
1953 crate::query::memory::memories_for_edges(self.storage.connection(), edge_ids, limit)
1954 }
1955
1956 pub fn memory_evidence_for_symbol_and_edges(
1957 &self,
1958 symbol: &crate::query::symbol::SymbolHit,
1959 edge_ids: &[i64],
1960 limit: u32,
1961 ) -> anyhow::Result<crate::query::memory::RepoMemoryEvidence> {
1962 crate::query::memory::memory_evidence_for_symbol_and_edges(
1963 self.storage.connection(),
1964 symbol,
1965 edge_ids,
1966 limit,
1967 )
1968 }
1969
1970 pub fn memory_for_call_path_hash(
1971 &self,
1972 edge_sequence_hash: &str,
1973 limit: u32,
1974 ) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
1975 crate::query::memory::memories_for_call_path_hash(
1976 self.storage.connection(),
1977 edge_sequence_hash,
1978 limit,
1979 )
1980 }
1981
1982 pub fn memory_validate(
1983 &self,
1984 ) -> anyhow::Result<crate::query::memory::RepoMemoryValidationReport> {
1985 crate::query::memory::validate_memories(self.storage.connection())
1986 }
1987
1988 pub fn rebuild_fts(&self) -> anyhow::Result<()> {
1989 schema::rebuild_fts(self.storage.connection())?;
1990 self.record_content_revision()?;
1991 self.record_fts_current()?;
1992 self.set_meta("fts_dirty", "false")?;
1993 Ok(())
1994 }
1995
1996 pub fn sync_fts(&self) -> anyhow::Result<()> {
1997 self.record_content_revision()?;
1998 self.record_fts_current()?;
1999 self.set_meta("fts_dirty", "false")?;
2000 Ok(())
2001 }
2002
2003 fn record_fts_current(&self) -> anyhow::Result<()> {
2004 self.set_meta("fts_synced_at_ms", &now_ms().to_string())?;
2005 let revision = self.content_revision()?;
2006 self.set_meta("fts_source_revision", &revision)?;
2007 Ok(())
2008 }
2009
2010 fn record_content_revision(&self) -> anyhow::Result<String> {
2011 let revision = self.content_revision()?;
2012 self.set_meta("content_revision", &revision)?;
2013 Ok(revision)
2014 }
2015
2016 pub fn heal_file(&self, path: &Path) -> anyhow::Result<()> {
2017 let Some(root) = self.storage.source_root() else {
2018 anyhow::bail!("index has no source_root metadata; rebuild required");
2019 };
2020 let row = self.file_row(path)?;
2021 let full_path = root.join(path);
2022 let text = fs::read_to_string(&full_path)?;
2023
2024 let changes = git_changed_paths(root).unwrap_or_default();
2025 let is_dirty = changes.changed.contains(path);
2026 let has_base_commit = !self.active_commit_sha.is_empty();
2027 let scope = if !has_base_commit || is_dirty {
2028 FileScope::worktree(self.active_worktree_id.clone())
2029 } else {
2030 FileScope::commit(self.active_commit_sha.clone())
2031 };
2032 self.remove_file_in_scope(path, &scope.commit_sha, &scope.worktree_id)?;
2033
2034 self.index_file(
2035 path,
2036 row.language,
2037 row.kind,
2038 file_metadata_ms(&full_path)?,
2039 &text,
2040 &scope,
2041 )?;
2042 self.rebuild_logical_symbols()?;
2043 self.resolve_edges()
2044 }
2045
2046 fn index_file(
2047 &self,
2048 path: &Path,
2049 language: Language,
2050 kind: TargetKind,
2051 modified_at_ms: i64,
2052 text: &str,
2053 scope: &FileScope,
2054 ) -> anyhow::Result<()> {
2055 if language != Language::Markdown && kind != TargetKind::Generated {
2056 if text.len() > chunker::MAX_STRUCTURAL_PARSE_BYTES {
2057 } else if let Some(message) = parser::parse_error(path, language, text)
2060 .unwrap_or_else(|err| Some(err.to_string()))
2061 {
2062 self.insert_parser_failure(path, language, &message)?;
2063 }
2064 }
2065 let sha256 = hex_sha256(text.as_bytes());
2066 let file_id = self.storage.connection().query_row(
2067 "INSERT INTO main.files(path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms, indexed_revision, commit_sha, worktree_id)
2068 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)
2069 RETURNING id",
2070 params![
2071 path_string(path),
2072 language.as_str(),
2073 kind.as_str(),
2074 sha256,
2075 modified_at_ms,
2076 matches!(kind, TargetKind::Generated),
2077 now_ms(),
2078 sha256,
2079 &scope.commit_sha,
2080 &scope.worktree_id,
2081 ],
2082 |row| row.get::<_, i64>(0),
2083 )?;
2084 let chunks = if kind == TargetKind::Generated {
2085 chunker::generated_chunks_for_file(path, text)
2086 } else {
2087 chunker::chunks_for_file(path, language, text)
2088 };
2089 let symbols =
2090 if kind == TargetKind::Generated || text.len() > chunker::MAX_STRUCTURAL_PARSE_BYTES {
2091 Vec::new()
2092 } else {
2093 symbols::symbols_for_file(path, language, text)
2094 };
2095 self.insert_chunks(file_id, &sha256, &chunks, text)?;
2096 self.insert_symbols(file_id, language, &symbols)?;
2097 if kind != TargetKind::Generated && text.len() <= edges::MAX_GRAPH_PARSE_BYTES {
2098 edges::index_file_edges(self.storage.connection(), file_id, path, language, text)?;
2099 }
2100 self.mark_fts_dirty()?;
2101 Ok(())
2102 }
2103
2104 fn insert_prepared_file(&self, prepared_file: &PreparedIndexFile) -> anyhow::Result<()> {
2105 let file = &prepared_file.file;
2106 let prepared = match &prepared_file.prepared {
2107 Ok(prepared) => prepared,
2108 Err(err) => {
2109 self.insert_parser_failure(&file.relative_path, file.language, &err.to_string())?;
2110 return Ok(());
2111 },
2112 };
2113 if let Some(message) = &prepared.parser_failure {
2114 self.insert_parser_failure(&file.relative_path, file.language, message)?;
2115 }
2116 let file_id = self.storage.connection().query_row(
2117 "INSERT INTO main.files(path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms, indexed_revision, commit_sha, worktree_id)
2118 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)
2119 RETURNING id",
2120 params![
2121 path_string(&file.relative_path),
2122 file.language.as_str(),
2123 file.kind.as_str(),
2124 prepared.sha256,
2125 prepared.modified_at_ms,
2126 matches!(file.kind, TargetKind::Generated),
2127 now_ms(),
2128 prepared.sha256,
2129 file.commit_sha,
2130 file.worktree_id,
2131 ],
2132 |row| row.get::<_, i64>(0),
2133 )?;
2134 self.insert_chunks(file_id, &prepared.sha256, &prepared.chunks, &prepared.text)?;
2135 self.insert_symbols(file_id, file.language, &prepared.symbols)?;
2136 if file.kind != TargetKind::Generated && prepared.text.len() <= edges::MAX_GRAPH_PARSE_BYTES
2137 {
2138 edges::index_file_edges(
2139 self.storage.connection(),
2140 file_id,
2141 &file.relative_path,
2142 file.language,
2143 &prepared.text,
2144 )?;
2145 }
2146 self.mark_fts_dirty()?;
2147 Ok(())
2148 }
2149
2150 fn insert_chunks(
2151 &self,
2152 file_id: i64,
2153 source_revision: &str,
2154 chunks: &[Chunk],
2155 full_text: &str,
2156 ) -> anyhow::Result<()> {
2157 let (path, language, kind) = self.storage.connection().query_row(
2158 "SELECT path, language, kind FROM main.files WHERE id = ?1",
2159 [file_id],
2160 |row| {
2161 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?, row.get::<_, String>(2)?))
2162 },
2163 )?;
2164 for chunk in chunks {
2165 let anchor =
2166 anchors::anchor_for_text(&chunk.text, chunk.start_line, chunk.end_line, full_text);
2167 let embedding_policy = ai::embedding_policy_for_chunk(
2168 Path::new(&path),
2169 &language,
2170 &kind,
2171 chunk.kind,
2172 chunk.symbol_path.as_deref(),
2173 &chunk.text,
2174 ai::DEFAULT_MAX_EMBEDDING_CHARS,
2175 );
2176 self.storage.connection().execute(
2177 "INSERT INTO chunks(file_id, chunk_kind, symbol_path, start_byte, end_byte, start_line, end_line, text, text_hash,
2178 source_revision, anchor_version, normalized_hash, start_boundary_hash, end_boundary_hash,
2179 start_context_hash, end_context_hash, context_radius, embedding_policy, embedding_priority)
2180 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19)",
2181 params![
2182 file_id,
2183 chunk.kind,
2184 chunk.symbol_path,
2185 i64::try_from(chunk.start_byte)?,
2186 i64::try_from(chunk.end_byte)?,
2187 i64::try_from(chunk.start_line)?,
2188 i64::try_from(chunk.end_line)?,
2189 chunk.text,
2190 hex_sha256(chunk.text.as_bytes()),
2191 source_revision,
2192 anchor.version,
2193 anchor.normalized_hash,
2194 anchor.start_boundary_hash,
2195 anchor.end_boundary_hash,
2196 anchor.start_context_hash,
2197 anchor.end_context_hash,
2198 anchor.context_radius,
2199 embedding_policy.policy,
2200 embedding_policy.priority,
2201 ],
2202 )?;
2203 let chunk_id = self.storage.connection().last_insert_rowid();
2204 self.storage.connection().execute(
2205 "INSERT INTO chunk_fts(rowid, text) VALUES (?1, ?2)",
2206 params![chunk_id, chunk.text],
2207 )?;
2208 }
2209 Ok(())
2210 }
2211
2212 fn insert_symbols(
2213 &self,
2214 file_id: i64,
2215 language: Language,
2216 symbols: &[Symbol],
2217 ) -> anyhow::Result<()> {
2218 for symbol in symbols {
2219 self.storage.connection().execute(
2220 "INSERT INTO symbols(file_id, language, name, qualified_name, kind, start_byte, end_byte, signature, docs)
2221 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)",
2222 params![
2223 file_id,
2224 language.as_str(),
2225 symbol.name,
2226 symbol.qualified_name,
2227 symbol.kind,
2228 i64::try_from(symbol.start_byte)?,
2229 i64::try_from(symbol.end_byte)?,
2230 symbol.signature,
2231 symbol.docs,
2232 ],
2233 )?;
2234 let symbol_id = self.storage.connection().last_insert_rowid();
2235 for fact in &symbol.facts {
2236 self.storage.connection().execute(
2237 "INSERT OR IGNORE INTO symbol_facts(symbol_id, fact_kind, fact_value)
2238 VALUES (?1, ?2, ?3)",
2239 params![symbol_id, fact.kind, fact.value],
2240 )?;
2241 }
2242 }
2243 Ok(())
2244 }
2245
2246 fn write_git_meta(&self, root: &Path) -> anyhow::Result<()> {
2247 self.set_meta("git_commit", &git_output(root, &["rev-parse", "HEAD"]).unwrap_or_default())?;
2248 let dirty = !git_output(root, &["status", "--porcelain"]).unwrap_or_default().is_empty();
2249 self.set_meta("git_dirty", if dirty { "true" } else { "false" })?;
2250 Ok(())
2251 }
2252
2253 fn apply_prepared_git_history(
2254 &self,
2255 root: &Path,
2256 handle: JoinHandle<anyhow::Result<git_history::PreparedGitHistory>>,
2257 ) -> anyhow::Result<GitHistoryIndexStatus> {
2258 let prepared = join_git_history_prepare(handle)?;
2259 git_history::apply_prepared(self.storage.connection(), root, prepared)
2260 }
2261
2262 fn git_history_status(&self) -> anyhow::Result<GitHistoryIndexStatus> {
2263 let Some(root) = self.storage.source_root() else {
2264 return git_history::status(self.storage.connection(), Path::new("."));
2265 };
2266 git_history::status(self.storage.connection(), root)
2267 }
2268
2269 fn github_status(&self) -> anyhow::Result<GitHubStatus> {
2270 github::status(self.storage.connection())
2271 }
2272
2273 fn mark_fts_dirty(&self) -> anyhow::Result<()> {
2274 self.set_meta("fts_dirty", "true")
2275 }
2276
2277 fn resolve_edges(&self) -> anyhow::Result<()> {
2278 edges::resolve_all_edges(self.storage.connection())
2279 }
2280
2281 fn rebuild_logical_symbols(&self) -> anyhow::Result<()> {
2282 self.storage.connection().execute_batch(
2288 "
2289 DELETE FROM main.logical_symbol_members;
2290 DELETE FROM main.logical_symbols;
2291 ",
2292 )?;
2293
2294 let mut stmt = self.storage.connection().prepare(
2295 "
2296 SELECT symbols.id, symbols.file_id, files.path, symbols.language, symbols.name,
2297 symbols.qualified_name, symbols.kind, symbols.start_byte, symbols.end_byte,
2298 symbols.signature,
2299 COALESCE((
2300 SELECT chunks.start_byte
2301 FROM chunks
2302 WHERE chunks.file_id = symbols.file_id
2303 AND symbols.start_byte >= chunks.start_byte
2304 AND symbols.start_byte < chunks.end_byte
2305 ORDER BY chunks.end_byte - chunks.start_byte ASC
2306 LIMIT 1
2307 ), symbols.start_byte) AS chunk_start_byte,
2308 COALESCE((
2309 SELECT chunks.start_line
2310 FROM chunks
2311 WHERE chunks.file_id = symbols.file_id
2312 AND symbols.start_byte >= chunks.start_byte
2313 AND symbols.start_byte < chunks.end_byte
2314 ORDER BY chunks.end_byte - chunks.start_byte ASC
2315 LIMIT 1
2316 ), 1) AS chunk_start_line,
2317 COALESCE((
2318 SELECT chunks.text
2319 FROM chunks
2320 WHERE chunks.file_id = symbols.file_id
2321 AND symbols.start_byte >= chunks.start_byte
2322 AND symbols.start_byte < chunks.end_byte
2323 ORDER BY chunks.end_byte - chunks.start_byte ASC
2324 LIMIT 1
2325 ), '') AS chunk_text
2326 FROM symbols
2327 JOIN files ON files.id = symbols.file_id
2328 ORDER BY files.path, symbols.language, symbols.qualified_name, symbols.kind,
2329 symbols.start_byte, symbols.end_byte
2330 ",
2331 )?;
2332 let rows = stmt.query_map([], |row| {
2333 let start_byte = usize::try_from(row.get::<_, i64>(7)?).unwrap_or(0);
2334 let end_byte = usize::try_from(row.get::<_, i64>(8)?).unwrap_or(0);
2335 let chunk_start_byte = usize::try_from(row.get::<_, i64>(10)?).unwrap_or(start_byte);
2336 let chunk_start_line = row.get::<_, i64>(11)?;
2337 let chunk_text: String = row.get(12)?;
2338 let start_line =
2339 symbol_line_for_byte(&chunk_text, chunk_start_byte, chunk_start_line, start_byte);
2340 let end_line =
2341 symbol_line_for_byte(&chunk_text, chunk_start_byte, chunk_start_line, end_byte);
2342 Ok(LogicalSymbolMemberRow {
2343 symbol_id: row.get(0)?,
2344 path: row.get(2)?,
2345 language: row.get(3)?,
2346 name: row.get(4)?,
2347 qualified_name: row.get(5)?,
2348 kind: row.get(6)?,
2349 signature: row.get(9)?,
2350 start_line,
2351 end_line,
2352 })
2353 })?;
2354 let mut groups: BTreeMap<LogicalSymbolKey, Vec<LogicalSymbolMemberRow>> = BTreeMap::new();
2355 for row in rows {
2356 let row = row?;
2357 groups.entry(LogicalSymbolKey::from(&row)).or_default().push(row);
2358 }
2359 for (key, members) in groups {
2360 let group_reason = if members.len() > 1 { "cfg_variant" } else { "single" };
2361 let logical_symbol_id = key.stable_id();
2362 self.storage.connection().execute(
2363 "
2364 INSERT INTO logical_symbols(id, language, path, logical_name, qualified_name, kind, variant_count, group_reason)
2365 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)
2366 ",
2367 params![
2368 logical_symbol_id,
2369 key.language,
2370 key.path,
2371 key.name,
2372 key.qualified_name,
2373 key.kind,
2374 i64::try_from(members.len()).unwrap_or(i64::MAX),
2375 group_reason,
2376 ],
2377 )?;
2378 for member in members {
2379 let signature_hash =
2380 member.signature.as_deref().map(|signature| hex_sha256(signature.as_bytes()));
2381 self.storage.connection().execute(
2382 "
2383 INSERT INTO logical_symbol_members(
2384 logical_symbol_id, symbol_id, cfg_expr, signature_hash, start_line, end_line
2385 )
2386 VALUES (?1, ?2, NULL, ?3, ?4, ?5)
2387 ",
2388 params![
2389 logical_symbol_id,
2390 member.symbol_id,
2391 signature_hash,
2392 member.start_line,
2393 member.end_line,
2394 ],
2395 )?;
2396 }
2397 }
2398 Ok(())
2399 }
2400
2401 fn graph_coverage(
2402 &self,
2403 paths: BTreeSet<String>,
2404 ) -> anyhow::Result<crate::query::graph::GraphCoverage> {
2405 let indexed_files =
2406 self.storage
2407 .connection()
2408 .query_row("SELECT COUNT(*) FROM files", [], |row| row.get::<_, i64>(0))?;
2409 let parser_failure_paths = self.parser_failure_paths()?;
2410 let parser_failures = u64::try_from(parser_failure_paths.len()).unwrap_or(0);
2411 let known_index_gaps = parser_failure_paths
2412 .iter()
2413 .map(|failure| {
2414 format!(
2415 "{} parser failed for {}: {}",
2416 failure.language, failure.path, failure.message
2417 )
2418 })
2419 .collect::<Vec<_>>();
2420 let mut stale_files = 0_u64;
2421 let mut parser_coverage_for_paths = Vec::new();
2422 for path in paths {
2423 let Some(row) = self.graph_path_row(&path)? else {
2424 parser_coverage_for_paths.push(crate::query::graph::GraphPathCoverage {
2425 path,
2426 language: "unknown".to_string(),
2427 parser_status: "missing_from_index".to_string(),
2428 graph_status: "missing_from_index".to_string(),
2429 last_indexed_revision: None,
2430 });
2431 continue;
2432 };
2433 let stale = self.source_path_is_stale(&path, &row.sha256);
2434 if stale {
2435 stale_files += 1;
2436 }
2437 let parser_failed = parser_failure_paths.iter().any(|failure| failure.path == path);
2438 parser_coverage_for_paths.push(crate::query::graph::GraphPathCoverage {
2439 path,
2440 language: row.language,
2441 parser_status: if parser_failed { "failed" } else { "ok" }.to_string(),
2442 graph_status: if stale {
2443 "stale_source"
2444 } else if parser_failed {
2445 "parser_failed"
2446 } else {
2447 "ok"
2448 }
2449 .to_string(),
2450 last_indexed_revision: (!row.indexed_revision.is_empty())
2451 .then_some(row.indexed_revision),
2452 });
2453 }
2454 Ok(crate::query::graph::GraphCoverage {
2455 indexed_files: u64::try_from(indexed_files).unwrap_or(0),
2456 parser_failures,
2457 stale_files,
2458 known_index_gaps,
2459 parser_coverage_for_paths,
2460 })
2461 }
2462
2463 fn graph_path_row(&self, path: &str) -> anyhow::Result<Option<GraphPathRow>> {
2464 self.storage
2465 .connection()
2466 .query_row(
2467 "SELECT language, sha256, indexed_revision FROM files WHERE path = ?1",
2468 [path],
2469 |row| {
2470 Ok(GraphPathRow {
2471 language: row.get(0)?,
2472 sha256: row.get(1)?,
2473 indexed_revision: row.get(2)?,
2474 })
2475 },
2476 )
2477 .optional()
2478 .map_err(Into::into)
2479 }
2480
2481 fn source_path_is_stale(&self, path: &str, indexed_sha256: &str) -> bool {
2482 let Some(root) = self.storage.source_root() else {
2483 return false;
2484 };
2485 let Ok(bytes) = fs::read(root.join(path)) else {
2486 return true;
2487 };
2488 hex_sha256(&bytes) != indexed_sha256
2489 }
2490
2491 fn regex_hits(
2492 &self,
2493 pattern: &str,
2494 regex: &Regex,
2495 include_tests: bool,
2496 ) -> anyhow::Result<Vec<crate::query::graph::TextOnlyHit>> {
2497 let Some(root) = self.storage.source_root() else {
2498 anyhow::bail!("cannot compare graph to text: source_root is missing from index_meta");
2499 };
2500 let mut stmt = self.storage.connection().prepare("SELECT path FROM files ORDER BY path")?;
2501 let paths =
2502 stmt.query_map([], |row| row.get::<_, String>(0))?.collect::<Result<Vec<_>, _>>()?;
2503 let mut hits = Vec::new();
2504 for path in paths {
2505 if !include_tests && is_test_like_path(&path) {
2506 continue;
2507 }
2508 let full_path = root.join(&path);
2509 let Ok(text) = fs::read_to_string(&full_path) else {
2510 continue;
2511 };
2512 for (index, line) in text.lines().enumerate() {
2513 if regex.is_match(line) {
2514 hits.push(crate::query::graph::TextOnlyHit {
2515 path: path.clone(),
2516 line: i64::try_from(index + 1).unwrap_or(i64::MAX),
2517 text: line.trim().to_string(),
2518 reason: "text pattern matched".to_string(),
2519 likely_gap: pattern.to_string(),
2520 });
2521 }
2522 }
2523 }
2524 Ok(hits)
2525 }
2526
2527 fn current_line_text(&self, path: &str, line: i64) -> anyhow::Result<Option<String>> {
2528 let Some(root) = self.storage.source_root() else {
2529 return Ok(None);
2530 };
2531 let Ok(text) = fs::read_to_string(root.join(path)) else {
2532 return Ok(None);
2533 };
2534 let Some(index) = usize::try_from(line.saturating_sub(1)).ok() else {
2535 return Ok(None);
2536 };
2537 Ok(text.lines().nth(index).map(|line| line.trim().to_string()))
2538 }
2539
2540 fn ensure_graph_index_current(&self) -> anyhow::Result<()> {
2541 if self.meta("graph_index_version")?.as_deref() == Some(GRAPH_INDEX_VERSION) {
2542 return Ok(());
2543 }
2544 let Some(root) = self.storage.source_root().map(Path::to_path_buf) else {
2545 return Ok(());
2546 };
2547 self.storage.execute_batch("BEGIN IMMEDIATE TRANSACTION")?;
2548 let result = (|| -> anyhow::Result<()> {
2549 self.storage.connection().execute("DELETE FROM edges", [])?;
2550 let files = self.graph_reindex_files()?;
2551 for file in files {
2552 if file.kind == TargetKind::Generated || file.language == Language::Markdown {
2553 continue;
2554 }
2555 let full_path = root.join(&file.path);
2556 let Ok(text) = fs::read_to_string(full_path) else {
2557 continue;
2558 };
2559 if text.len() > edges::MAX_GRAPH_PARSE_BYTES {
2560 continue;
2561 }
2562 edges::index_file_edges(
2563 self.storage.connection(),
2564 file.id,
2565 Path::new(&file.path),
2566 file.language,
2567 &text,
2568 )?;
2569 }
2570 self.resolve_edges()?;
2571 self.mark_graph_index_current()?;
2572 Ok(())
2573 })();
2574 if result.is_err() {
2575 let _ = self.storage.execute_batch("ROLLBACK");
2576 }
2577 result?;
2578 self.storage.execute_batch("COMMIT")?;
2579 Ok(())
2580 }
2581
2582 fn mark_graph_index_current(&self) -> anyhow::Result<()> {
2583 self.set_meta("graph_index_version", GRAPH_INDEX_VERSION)
2584 }
2585
2586 fn set_meta(&self, key: &str, value: &str) -> anyhow::Result<()> {
2587 self.storage.connection().execute(
2588 "INSERT INTO index_meta(key, value) VALUES (?1, ?2)
2589 ON CONFLICT(key) DO UPDATE SET value = excluded.value",
2590 params![key, value],
2591 )?;
2592 Ok(())
2593 }
2594
2595 fn meta(&self, key: &str) -> anyhow::Result<Option<String>> {
2596 meta_for(self.storage.connection(), key)
2597 }
2598
2599 fn insert_parser_failure(
2600 &self,
2601 path: &Path,
2602 language: Language,
2603 message: &str,
2604 ) -> anyhow::Result<()> {
2605 self.storage.connection().execute(
2606 "INSERT INTO parser_failures(path, language, message) VALUES (?1, ?2, ?3)",
2607 params![path_string(path), language.as_str(), message],
2608 )?;
2609 Ok(())
2610 }
2611
2612 fn parser_failure_count(&self) -> anyhow::Result<u64> {
2613 let count = self.storage.connection().query_row(
2614 "SELECT COUNT(*) FROM parser_failures",
2615 [],
2616 |row| row.get::<_, i64>(0),
2617 )?;
2618 Ok(u64::try_from(count).unwrap_or(0))
2619 }
2620
2621 fn parser_failure_paths(&self) -> anyhow::Result<Vec<ParserFailure>> {
2622 let mut stmt = self.storage.connection().prepare(
2623 "SELECT path, language, message FROM parser_failures ORDER BY path, language, message",
2624 )?;
2625 let rows = stmt.query_map([], |row| {
2626 Ok(ParserFailure { path: row.get(0)?, language: row.get(1)?, message: row.get(2)? })
2627 })?;
2628 let mut failures = Vec::new();
2629 for row in rows {
2630 failures.push(row?);
2631 }
2632 Ok(failures)
2633 }
2634
2635 fn search_with_heal(
2636 &self,
2637 query: &str,
2638 limit: u32,
2639 include_generated: bool,
2640 allow_heal: bool,
2641 explain: bool,
2642 options: SearchOptions,
2643 ) -> anyhow::Result<Vec<SearchHit>> {
2644 let hits = crate::search::lexical::search_with_options(
2645 self.storage.connection(),
2646 query,
2647 limit,
2648 include_generated,
2649 explain,
2650 options,
2651 )?;
2652 if !allow_heal {
2653 return Ok(hits);
2654 }
2655 let stale = self.stale_hit_paths(&hits)?;
2656 if stale.is_empty() {
2657 return Ok(hits);
2658 }
2659 if stale.len() > MAX_AUTO_HEAL_FILES_PER_CALL {
2660 anyhow::bail!(IndexError::NeedsReindex {
2661 stale_files: stale.len(),
2662 cap: MAX_AUTO_HEAL_FILES_PER_CALL,
2663 });
2664 }
2665 for path in stale {
2666 self.heal_file(Path::new(&path))?;
2667 }
2668 self.sync_fts()?;
2669 self.search_with_heal(query, limit, include_generated, false, explain, options)
2670 }
2671
2672 fn stale_hit_paths(&self, hits: &[SearchHit]) -> anyhow::Result<Vec<String>> {
2673 let Some(root) = self.storage.source_root() else {
2674 return Ok(Vec::new());
2675 };
2676 let mut stale = Vec::new();
2677 let mut seen = BTreeSet::new();
2678 for hit in hits {
2679 if !seen.insert(hit.path.clone()) {
2680 continue;
2681 }
2682 let source_path = root.join(&hit.path);
2683 let Ok(text) = fs::read_to_string(source_path) else {
2684 stale.push(hit.path.clone());
2685 continue;
2686 };
2687 let chunk = crate::query::read_chunk(self.storage.connection(), hit.chunk_id)?;
2688 let Some(chunk) = chunk else {
2689 stale.push(hit.path.clone());
2690 continue;
2691 };
2692 let anchor = self.chunk_anchor(hit.chunk_id)?;
2693 let status = anchors::validate(
2694 &chunk.text,
2695 usize::try_from(chunk.start_line).unwrap_or(1),
2696 usize::try_from(chunk.end_line).unwrap_or(1),
2697 &anchor,
2698 &text,
2699 );
2700 if !matches!(status, AnchorStatus::Exact) {
2701 stale.push(hit.path.clone());
2702 }
2703 }
2704 Ok(stale)
2705 }
2706
2707 fn chunk_anchor(&self, chunk_id: i64) -> anyhow::Result<ChunkAnchor> {
2708 Ok(self.storage.connection().query_row(
2709 "
2710 SELECT anchor_version, normalized_hash, start_boundary_hash, end_boundary_hash,
2711 start_context_hash, end_context_hash, context_radius
2712 FROM chunks WHERE id = ?1
2713 ",
2714 [chunk_id],
2715 |row| {
2716 Ok(ChunkAnchor {
2717 version: row.get(0)?,
2718 normalized_hash: row.get(1)?,
2719 start_boundary_hash: row.get(2)?,
2720 end_boundary_hash: row.get(3)?,
2721 start_context_hash: row.get(4)?,
2722 end_context_hash: row.get(5)?,
2723 context_radius: row.get(6)?,
2724 })
2725 },
2726 )?)
2727 }
2728
2729 fn mark_file_deleted(&self, path: &Path) -> anyhow::Result<()> {
2730 let path = path_string(path);
2731 self.remove_file_in_scope(Path::new(&path), "", &self.active_worktree_id)?;
2732 self.storage.connection().execute(
2733 "INSERT INTO main.files(path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms, indexed_revision, commit_sha, worktree_id)
2734 VALUES (?1, 'unknown', 'deleted', '', 0, 0, ?2, '', '', ?3)
2735 ON CONFLICT(path, commit_sha, worktree_id) DO UPDATE SET
2736 kind = 'deleted',
2737 sha256 = '',
2738 modified_at_ms = 0,
2739 indexed_at_ms = excluded.indexed_at_ms",
2740 params![path, now_ms(), self.active_worktree_id],
2741 )?;
2742 self.mark_fts_dirty()?;
2743 Ok(())
2744 }
2745
2746 fn remove_file_in_scope(
2747 &self,
2748 path: &Path,
2749 commit_sha: &str,
2750 worktree_id: &str,
2751 ) -> anyhow::Result<()> {
2752 let path = path_string(path);
2753 self.storage.connection().execute(
2754 "UPDATE edges
2755 SET to_symbol_id = NULL,
2756 confidence = 'NameOnly'
2757 WHERE to_symbol_id IN (
2758 SELECT symbols.id FROM symbols
2759 JOIN main.files ON main.files.id = symbols.file_id
2760 WHERE main.files.path = ?1
2761 AND main.files.commit_sha = ?2
2762 AND main.files.worktree_id = ?3
2763 )",
2764 params![path, commit_sha, worktree_id],
2765 )?;
2766 self.storage.connection().execute(
2767 "DELETE FROM edges
2768 WHERE source_file_id IN (
2769 SELECT id FROM main.files
2770 WHERE path = ?1 AND commit_sha = ?2 AND worktree_id = ?3
2771 )
2772 OR from_symbol_id IN (
2773 SELECT symbols.id FROM symbols
2774 JOIN main.files ON main.files.id = symbols.file_id
2775 WHERE main.files.path = ?1
2776 AND main.files.commit_sha = ?2
2777 AND main.files.worktree_id = ?3
2778 )",
2779 params![path, commit_sha, worktree_id],
2780 )?;
2781 self.storage
2782 .connection()
2783 .execute("DELETE FROM parser_failures WHERE path = ?1", [&path])?;
2784 self.storage.connection().execute(
2785 "DELETE FROM chunk_fts
2786 WHERE rowid IN (
2787 SELECT chunks.id FROM chunks
2788 JOIN main.files ON main.files.id = chunks.file_id
2789 WHERE main.files.path = ?1
2790 AND main.files.commit_sha = ?2
2791 AND main.files.worktree_id = ?3
2792 )",
2793 params![path, commit_sha, worktree_id],
2794 )?;
2795 self.storage.connection().execute(
2796 "DELETE FROM chunks
2797 WHERE file_id IN (
2798 SELECT id FROM main.files
2799 WHERE path = ?1 AND commit_sha = ?2 AND worktree_id = ?3
2800 )",
2801 params![path, commit_sha, worktree_id],
2802 )?;
2803 self.storage.connection().execute(
2804 "DELETE FROM symbols
2805 WHERE file_id IN (
2806 SELECT id FROM main.files
2807 WHERE path = ?1 AND commit_sha = ?2 AND worktree_id = ?3
2808 )",
2809 params![path, commit_sha, worktree_id],
2810 )?;
2811 self.storage.connection().execute(
2812 "DELETE FROM main.files WHERE path = ?1 AND commit_sha = ?2 AND worktree_id = ?3",
2813 params![path, commit_sha, worktree_id],
2814 )?;
2815 self.mark_fts_dirty()?;
2816 Ok(())
2817 }
2818
2819 fn ensure_fts_fresh(&self) -> anyhow::Result<()> {
2820 let content_revision = self.content_revision()?;
2821 let fts_source_revision = self.meta("fts_source_revision")?;
2822 if !self.fts_dirty()? && fts_source_revision.as_deref() == Some(content_revision.as_str()) {
2823 return Ok(());
2824 }
2825 self.rebuild_fts()?;
2826 let refreshed_revision = self.meta("fts_source_revision")?;
2827 if refreshed_revision.as_deref() != Some(content_revision.as_str()) {
2828 anyhow::bail!(
2829 "FTS freshness invariant failed: content_revision={content_revision}, fts_source_revision={}",
2830 refreshed_revision.unwrap_or_else(|| "<missing>".to_string())
2831 );
2832 }
2833 Ok(())
2834 }
2835
2836 fn fts_dirty(&self) -> anyhow::Result<bool> {
2837 Ok(self.meta("fts_dirty")?.as_deref() == Some("true"))
2838 }
2839
2840 fn file_row(&self, path: &Path) -> anyhow::Result<FileRow> {
2841 self.storage
2842 .connection()
2843 .query_row(
2844 "SELECT language, kind FROM files WHERE path = ?1",
2845 [path_string(path)],
2846 |row| {
2847 let language: String = row.get(0)?;
2848 let kind: String = row.get(1)?;
2849 Ok((language, kind))
2850 },
2851 )
2852 .map_err(Into::into)
2853 .and_then(|(language, kind)| {
2854 Ok(FileRow { language: language.parse()?, kind: kind.parse()? })
2855 })
2856 }
2857
2858 fn graph_reindex_files(&self) -> anyhow::Result<Vec<GraphReindexFile>> {
2859 let mut stmt = self
2860 .storage
2861 .connection()
2862 .prepare("SELECT id, path, language, kind FROM files ORDER BY path")?;
2863 let rows = stmt.query_map([], |row| {
2864 let language: String = row.get(2)?;
2865 let kind: String = row.get(3)?;
2866 Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?, language, kind))
2867 })?;
2868 let mut files = Vec::new();
2869 for row in rows {
2870 let (id, path, language, kind) = row?;
2871 files.push(GraphReindexFile {
2872 id,
2873 path,
2874 language: language.parse()?,
2875 kind: kind.parse()?,
2876 });
2877 }
2878 Ok(files)
2879 }
2880
2881 fn indexed_files(&self) -> anyhow::Result<Vec<IndexedFile>> {
2882 let mut stmt =
2883 self.storage.connection().prepare("SELECT path, sha256 FROM files ORDER BY path")?;
2884 let rows =
2885 stmt.query_map([], |row| Ok(IndexedFile { path: row.get(0)?, sha256: row.get(1)? }))?;
2886 let mut files = Vec::new();
2887 for row in rows {
2888 files.push(row?);
2889 }
2890 Ok(files)
2891 }
2892
2893 fn indexed_file_count(&self) -> anyhow::Result<usize> {
2894 let count =
2895 self.storage
2896 .connection()
2897 .query_row("SELECT COUNT(*) FROM files", [], |row| row.get::<_, i64>(0))?;
2898 Ok(usize::try_from(count).unwrap_or(usize::MAX))
2899 }
2900
2901 fn content_revision(&self) -> anyhow::Result<String> {
2902 let value = self.storage.connection().query_row(
2903 "SELECT COALESCE(string_agg(path || ':' || sha256, ',' ORDER BY path), '') FROM files",
2904 [],
2905 |row| row.get::<_, String>(0),
2906 )?;
2907 Ok(hex_sha256(value.as_bytes()))
2908 }
2909}
2910
2911#[derive(Debug)]
2912struct FileRow {
2913 language: Language,
2914 kind: TargetKind,
2915}
2916
2917#[derive(Debug)]
2918struct GraphReindexFile {
2919 id: i64,
2920 path: String,
2921 language: Language,
2922 kind: TargetKind,
2923}
2924
2925#[derive(Debug)]
2926struct GraphPathRow {
2927 language: String,
2928 sha256: String,
2929 indexed_revision: String,
2930}
2931
2932fn rank_docs_for_symbol(symbol: &crate::query::symbol::SymbolHit, hits: &mut [SearchHit]) {
2933 let source_module = module_stem(&symbol.path);
2934 let symbol_name = symbol.name.to_ascii_lowercase();
2935 let qualified_name = symbol.qualified_name.to_ascii_lowercase();
2936 hits.sort_by(|a, b| {
2937 let a_rank = docs_locality_rank(symbol, &source_module, &symbol_name, &qualified_name, a);
2938 let b_rank = docs_locality_rank(symbol, &source_module, &symbol_name, &qualified_name, b);
2939 a_rank
2940 .cmp(&b_rank)
2941 .then_with(|| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal))
2942 .then_with(|| a.path.cmp(&b.path))
2943 .then_with(|| a.start_line.cmp(&b.start_line))
2944 });
2945 for (idx, hit) in hits.iter_mut().enumerate() {
2946 hit.score = (10_000usize.saturating_sub(idx)) as f64;
2947 }
2948}
2949
2950fn docs_locality_rank(
2951 symbol: &crate::query::symbol::SymbolHit,
2952 source_module: &str,
2953 symbol_name: &str,
2954 qualified_name: &str,
2955 hit: &SearchHit,
2956) -> u8 {
2957 let path = hit.path.to_ascii_lowercase();
2958 let summary = hit.summary.to_ascii_lowercase();
2959 let hit_symbol = hit.symbol_path.as_deref().unwrap_or_default().to_ascii_lowercase();
2960 if hit.path == symbol.path && hit_symbol == symbol.qualified_name.to_ascii_lowercase() {
2961 return 0;
2962 }
2963 if hit.path == symbol.path {
2964 return 1;
2965 }
2966 if !source_module.is_empty()
2967 && path.contains(source_module)
2968 && (summary.contains(symbol_name) || hit_symbol.contains(symbol_name))
2969 {
2970 return 2;
2971 }
2972 if summary.contains(qualified_name) || hit_symbol.contains(qualified_name) {
2973 return 3;
2974 }
2975 if summary.contains(symbol_name) || hit_symbol.contains(symbol_name) {
2976 return 4;
2977 }
2978 if !source_module.is_empty() && path.contains(source_module) {
2979 return 5;
2980 }
2981 9
2982}
2983
2984fn module_stem(path: &str) -> String {
2985 Path::new(path)
2986 .file_stem()
2987 .and_then(|value| value.to_str())
2988 .unwrap_or_default()
2989 .to_ascii_lowercase()
2990}
2991
2992fn dedupe_search_hits(hits: &mut Vec<SearchHit>) {
2993 let mut seen = BTreeSet::new();
2994 hits.retain(|hit| seen.insert(hit.chunk_id));
2995}
2996
2997fn bounded_summary(text: &str) -> String {
2998 text.split_whitespace().collect::<Vec<_>>().join(" ").chars().take(240).collect()
2999}
3000
3001#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
3002struct LogicalSymbolKey {
3003 language: String,
3004 path: String,
3005 name: String,
3006 qualified_name: String,
3007 kind: String,
3008 signature: Option<String>,
3012}
3013
3014impl LogicalSymbolKey {
3015 fn from(row: &LogicalSymbolMemberRow) -> Self {
3016 Self {
3017 language: row.language.clone(),
3018 path: row.path.clone(),
3019 name: row.name.clone(),
3020 qualified_name: row.qualified_name.clone(),
3021 kind: row.kind.clone(),
3022 signature: row.signature.clone(),
3023 }
3024 }
3025
3026 fn stable_id(&self) -> i64 {
3032 let canonical = format!(
3033 "{}\u{1f}{}\u{1f}{}\u{1f}{}\u{1f}{}\u{1f}{}",
3034 self.language,
3035 self.path,
3036 self.name,
3037 self.qualified_name,
3038 self.kind,
3039 self.signature.as_deref().unwrap_or(""),
3040 );
3041 let digest = Sha256::digest(canonical.as_bytes());
3042 let mut bytes = [0u8; 8];
3043 bytes.copy_from_slice(&digest[..8]);
3044 (u64::from_be_bytes(bytes) >> 1) as i64
3045 }
3046}
3047
3048#[derive(Debug, Clone)]
3049struct LogicalSymbolMemberRow {
3050 symbol_id: i64,
3051 path: String,
3052 language: String,
3053 name: String,
3054 qualified_name: String,
3055 kind: String,
3056 signature: Option<String>,
3057 start_line: i64,
3058 end_line: i64,
3059}
3060
3061fn symbol_line_for_byte(
3062 text: &str,
3063 chunk_start_byte: usize,
3064 chunk_start_line: i64,
3065 byte: usize,
3066) -> i64 {
3067 if byte <= chunk_start_byte {
3068 return chunk_start_line.max(1);
3069 }
3070 let local = byte.saturating_sub(chunk_start_byte).min(text.len());
3071 chunk_start_line
3072 + i64::try_from(text[..local].bytes().filter(|byte| *byte == b'\n').count()).unwrap_or(0)
3073}
3074
3075fn graph_only_reason(edge: &crate::query::graph::GraphHop, current_line: Option<&str>) -> String {
3076 let Some(line) = current_line else {
3077 return "missing_current_source_line".to_string();
3078 };
3079 if edge
3080 .target_qualified_name
3081 .as_deref()
3082 .is_some_and(|qualified| !qualified.is_empty() && line.contains(qualified))
3083 {
3084 return "qualified_call_pattern_mismatch".to_string();
3085 }
3086 if edge.target.as_deref().is_some_and(|target| !target.is_empty() && line.contains(target)) {
3087 return "imported_or_unqualified_call".to_string();
3088 }
3089 if edge
3090 .evidence
3091 .as_deref()
3092 .is_some_and(|evidence| !evidence.is_empty() && line.contains(evidence.trim()))
3093 {
3094 return "regex_too_narrow".to_string();
3095 }
3096 "stale_or_overbroad_graph_edge".to_string()
3097}
3098
3099fn is_likely_false_positive_graph_only(
3100 edge: &crate::query::graph::GraphHop,
3101 graph_only: &crate::query::graph::GraphOnlyEdge,
3102) -> bool {
3103 if graph_only.likely_reason == "stale_or_overbroad_graph_edge" {
3104 return true;
3105 }
3106 edge.resolution == "target_name_fallback"
3107 || edge.confidence == "name_only"
3108 || edge.confidence == "ambiguous"
3109 || !edge.verified_target_symbol
3110}
3111
3112fn classify_text_only_hit(
3113 path: &str,
3114 text: &str,
3115 parser_failure_paths: &BTreeSet<String>,
3116) -> &'static str {
3117 if parser_failure_paths.contains(path) {
3118 return "parser_failure";
3119 }
3120 if is_generated_path(path) {
3121 return "generated_text_mention";
3122 }
3123 let trimmed = text.trim_start();
3124 if is_comment_like_text(trimmed) {
3125 return "comment_text_mention";
3126 }
3127 if is_import_or_declaration_text(trimmed) {
3128 return "declaration_text_mention";
3129 }
3130 if is_test_like_path(path) && is_test_scaffolding_text(trimmed) {
3131 return "test_scaffolding_text_mention";
3132 }
3133 "parser_call_extraction"
3134}
3135
3136fn is_likely_parser_gap_kind(kind: &str) -> bool {
3137 matches!(kind, "parser_call_extraction" | "parser_failure")
3138}
3139
3140fn is_generated_path(path: &str) -> bool {
3141 path.contains("/generated/")
3142 || path.contains("/generated-web/")
3143 || path.ends_with(".d.ts")
3144 || path.ends_with("_bg.wasm.d.ts")
3145}
3146
3147fn is_comment_like_text(text: &str) -> bool {
3148 text.starts_with("//")
3149 || text.starts_with("/*")
3150 || text.starts_with('*')
3151 || text.starts_with("*/")
3152 || text.starts_with("#")
3153}
3154
3155fn is_import_or_declaration_text(text: &str) -> bool {
3156 text.starts_with("import ")
3157 || text.starts_with("export type ")
3158 || text.starts_with("export interface ")
3159 || text.starts_with("type ")
3160 || text.starts_with("interface ")
3161 || text.starts_with("declare ")
3162}
3163
3164fn is_test_scaffolding_text(text: &str) -> bool {
3165 text.contains(".mock")
3166 || text.contains("jest.")
3167 || text.contains("jest<")
3168 || text.contains("expect(")
3169 || text.contains("toHaveBeen")
3170 || text.contains("describe(")
3171 || text.contains("it(")
3172 || text.contains("test(")
3173}
3174
3175fn recommended_graph_text_fallback(
3176 parser_gaps: &[crate::query::graph::TextOnlyHit],
3177 graph_only_edges: &[crate::query::graph::GraphOnlyEdge],
3178) -> String {
3179 match (parser_gaps.is_empty(), graph_only_edges.is_empty()) {
3180 (false, false) => "both",
3181 (false, true) => "text",
3182 (true, false) => "graph",
3183 (true, true) => "none",
3184 }
3185 .to_string()
3186}
3187
3188fn compare_pattern_match_mode(pattern: &str, symbol_name: &str) -> String {
3189 if symbol_name.is_empty() {
3190 return "regex".to_string();
3191 }
3192 let escaped_call = format!("{symbol_name}\\(");
3193 let plain_call = format!("{symbol_name}(");
3194 if pattern.contains("\\b")
3195 || pattern.contains("\\W")
3196 || pattern.contains("[^")
3197 || pattern.contains(&escaped_call)
3198 || pattern.contains(&plain_call)
3199 {
3200 return "identifier_or_call".to_string();
3201 }
3202 if pattern.contains(symbol_name) {
3203 return "substring_identifier".to_string();
3204 }
3205 "regex".to_string()
3206}
3207
3208fn is_test_like_path(path: &str) -> bool {
3209 let lower = path.to_ascii_lowercase();
3210 lower.contains("/test/")
3211 || lower.contains("/tests/")
3212 || lower.contains("/__tests__/")
3213 || lower.ends_with("_test.rs")
3214 || lower.ends_with(".test.ts")
3215 || lower.ends_with(".test.tsx")
3216 || lower.ends_with(".spec.ts")
3217 || lower.ends_with(".spec.tsx")
3218}
3219
3220#[derive(Debug)]
3221struct IndexedFile {
3222 path: String,
3223 sha256: String,
3224}
3225
3226#[derive(Debug, Clone)]
3227struct IndexFile {
3228 full_path: PathBuf,
3229 relative_path: PathBuf,
3230 language: Language,
3231 kind: TargetKind,
3232 commit_sha: String,
3233 worktree_id: String,
3234}
3235
3236#[derive(Debug, Clone)]
3237struct FileScope {
3238 commit_sha: String,
3239 worktree_id: String,
3240}
3241
3242impl FileScope {
3243 fn commit(commit_sha: String) -> Self {
3244 Self { commit_sha, worktree_id: String::new() }
3245 }
3246
3247 fn worktree(worktree_id: String) -> Self {
3248 Self { commit_sha: String::new(), worktree_id }
3249 }
3250}
3251
3252#[derive(Debug)]
3253struct PreparedIndexFile {
3254 file: IndexFile,
3255 prepared: anyhow::Result<PreparedIndexContent>,
3256}
3257
3258#[derive(Debug)]
3259struct PreparedIndexContent {
3260 modified_at_ms: i64,
3261 text: String,
3262 sha256: String,
3263 chunks: Vec<Chunk>,
3264 symbols: Vec<Symbol>,
3265 parser_failure: Option<String>,
3266}
3267
3268#[derive(Debug)]
3269struct DiscoveryPlan {
3270 files: Vec<IndexFile>,
3271 deleted: BTreeSet<PathBuf>,
3272 unindexed: Vec<IndexFile>,
3273 changed: Vec<PathBuf>,
3274 discovered_files: usize,
3275 indexed_files: usize,
3276}
3277
3278#[derive(Debug, Default)]
3279struct GitChangedPaths {
3280 changed: BTreeSet<PathBuf>,
3281 deleted: BTreeSet<PathBuf>,
3282}
3283
3284fn collect_index_files(config: &Config) -> anyhow::Result<Vec<IndexFile>> {
3285 let mut targets = config.targets.iter().collect::<Vec<_>>();
3286 targets.sort_by_key(|target| match target.kind {
3287 TargetKind::Generated => 0,
3288 TargetKind::Tests => 1,
3289 TargetKind::Docs => 2,
3290 TargetKind::Source => 3,
3291 });
3292 let mut seen = BTreeSet::new();
3293 let mut files = Vec::new();
3294
3295 for target in targets {
3296 for file in walker::walk_target(&config.root, target)? {
3297 let relative_path = file.strip_prefix(&config.root)?.to_path_buf();
3298 if !seen.insert(relative_path.clone()) {
3299 continue;
3300 }
3301 files.push(IndexFile {
3302 full_path: file,
3303 relative_path,
3304 language: target.language,
3305 kind: target.kind,
3306 commit_sha: String::new(),
3307 worktree_id: String::new(),
3308 });
3309 }
3310 }
3311
3312 Ok(files)
3313}
3314
3315fn collect_changed_index_files(
3316 config: &Config,
3317 changes: &GitChangedPaths,
3318) -> anyhow::Result<Vec<IndexFile>> {
3319 let mut files = Vec::new();
3320 for relative_path in &changes.changed {
3321 let full_path = config.root.join(relative_path);
3322 if !full_path.is_file() {
3323 continue;
3324 }
3325 let Some((language, kind)) = target_for_path(config, relative_path) else {
3326 continue;
3327 };
3328 files.push(IndexFile {
3329 full_path,
3330 relative_path: relative_path.clone(),
3331 language,
3332 kind,
3333 commit_sha: String::new(),
3334 worktree_id: String::new(),
3335 });
3336 }
3337 Ok(files)
3338}
3339
3340fn spawn_git_history_prepare(
3341 root: &Path,
3342) -> JoinHandle<anyhow::Result<git_history::PreparedGitHistory>> {
3343 let root = root.to_path_buf();
3344 thread::spawn(move || git_history::prepare(&root))
3345}
3346
3347fn join_git_history_prepare(
3348 handle: JoinHandle<anyhow::Result<git_history::PreparedGitHistory>>,
3349) -> anyhow::Result<git_history::PreparedGitHistory> {
3350 handle.join().map_err(|_| anyhow::anyhow!("git history preparation panicked"))?
3351}
3352
3353fn prepare_index_file(file: &IndexFile) -> PreparedIndexFile {
3354 PreparedIndexFile { file: file.clone(), prepared: prepare_index_content(file) }
3355}
3356
3357fn prepare_files_with_progress<F>(
3358 files: &[IndexFile],
3359 progress: &mut F,
3360) -> anyhow::Result<Vec<PreparedIndexFile>>
3361where
3362 F: FnMut(IndexProgress),
3363{
3364 #[derive(Debug)]
3365 struct PreparedProgress {
3366 current: usize,
3367 total: usize,
3368 path: PathBuf,
3369 language: Language,
3370 kind: TargetKind,
3371 }
3372
3373 let total = files.len();
3374 let prepared = thread::scope(|scope| {
3375 let (tx, rx) = mpsc::channel();
3376 let completed = AtomicUsize::new(0);
3377 let handle = scope.spawn(move || {
3378 files
3379 .par_iter()
3380 .map(|file| {
3381 let prepared = prepare_index_file(file);
3382 let current = completed.fetch_add(1, Ordering::Relaxed) + 1;
3383 if should_report_file_progress(current, total) {
3384 let _ = tx.send(PreparedProgress {
3385 current,
3386 total,
3387 path: file.relative_path.clone(),
3388 language: file.language,
3389 kind: file.kind,
3390 });
3391 }
3392 prepared
3393 })
3394 .collect::<Vec<_>>()
3395 });
3396
3397 for event in rx {
3398 progress(IndexProgress::PreparingFile {
3399 current: event.current,
3400 total: event.total,
3401 path: event.path,
3402 language: event.language,
3403 kind: event.kind,
3404 });
3405 }
3406
3407 handle.join().map_err(|_| anyhow::anyhow!("parallel file preparation panicked"))
3408 })?;
3409 Ok(prepared)
3410}
3411
3412fn should_report_file_progress(current: usize, total: usize) -> bool {
3413 if total == 0 {
3414 return false;
3415 }
3416 current == 1
3417 || current == total
3418 || current.saturating_mul(10) / total
3419 != current.saturating_sub(1).saturating_mul(10) / total
3420}
3421
3422fn prepare_index_content(file: &IndexFile) -> anyhow::Result<PreparedIndexContent> {
3423 let text = fs::read_to_string(&file.full_path)?;
3424 let modified_at_ms = file_metadata_ms(&file.full_path)?;
3425 let sha256 = hex_sha256(text.as_bytes());
3426 let parser_failure =
3427 if file.language != Language::Markdown && file.kind != TargetKind::Generated {
3428 if text.len() > chunker::MAX_STRUCTURAL_PARSE_BYTES {
3429 None
3430 } else {
3431 parser::parse_error(&file.relative_path, file.language, &text)
3432 .unwrap_or_else(|err| Some(err.to_string()))
3433 }
3434 } else {
3435 None
3436 };
3437 let chunks = if file.kind == TargetKind::Generated {
3438 chunker::generated_chunks_for_file(&file.relative_path, &text)
3439 } else {
3440 chunker::chunks_for_file(&file.relative_path, file.language, &text)
3441 };
3442 let symbols =
3443 if file.kind == TargetKind::Generated || text.len() > chunker::MAX_STRUCTURAL_PARSE_BYTES {
3444 Vec::new()
3445 } else {
3446 symbols::symbols_for_file(&file.relative_path, file.language, &text)
3447 };
3448 Ok(PreparedIndexContent { modified_at_ms, text, sha256, chunks, symbols, parser_failure })
3449}
3450
3451fn discovery_plan(conn: &rusqlite::Connection, config: &Config) -> anyhow::Result<DiscoveryPlan> {
3452 let discovered = collect_index_files(config)?;
3453 let mut indexed = indexed_file_map(conn)?;
3454 let mut current_paths = BTreeSet::new();
3455 let mut files = Vec::new();
3456 let mut unindexed = Vec::new();
3457 let mut changed = Vec::new();
3458 let discovered_files = discovered.len();
3459 let hashed = discovered
3460 .par_iter()
3461 .map(|file| -> anyhow::Result<(IndexFile, String)> {
3462 let text = fs::read(&file.full_path)?;
3463 Ok((file.clone(), hex_sha256(&text)))
3464 })
3465 .collect::<Vec<_>>();
3466
3467 for hashed_file in hashed {
3468 let (file, current_hash) = hashed_file?;
3469 let relative = path_string(&file.relative_path);
3470 current_paths.insert(file.relative_path.clone());
3471 let Some(indexed_hash) = indexed.remove(&relative) else {
3472 unindexed.push(file.clone());
3473 files.push(file);
3474 continue;
3475 };
3476 if current_hash != indexed_hash {
3477 changed.push(file.relative_path.clone());
3478 files.push(file);
3479 }
3480 }
3481
3482 let deleted = indexed
3483 .into_keys()
3484 .map(PathBuf::from)
3485 .filter(|path| !current_paths.contains(path))
3486 .collect::<BTreeSet<_>>();
3487
3488 Ok(DiscoveryPlan {
3489 discovered_files,
3490 indexed_files: current_paths
3491 .len()
3492 .saturating_add(deleted.len())
3493 .saturating_sub(unindexed.len()),
3494 files,
3495 deleted,
3496 unindexed,
3497 changed,
3498 })
3499}
3500
3501fn indexed_file_map(conn: &rusqlite::Connection) -> anyhow::Result<BTreeMap<String, String>> {
3502 let mut stmt = conn.prepare("SELECT path, sha256 FROM files ORDER BY path")?;
3503 let rows =
3504 stmt.query_map([], |row| Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)))?;
3505 let mut files = BTreeMap::new();
3506 for row in rows {
3507 let (path, sha256) = row?;
3508 files.insert(path, sha256);
3509 }
3510 Ok(files)
3511}
3512
3513pub(crate) fn target_for_path(
3514 config: &Config,
3515 relative_path: &Path,
3516) -> Option<(Language, TargetKind)> {
3517 let relative = path_string(relative_path);
3518 let language = Language::from_path(relative_path)?;
3519 let mut targets = config.targets.iter().collect::<Vec<_>>();
3520 targets.sort_by_key(|target| match target.kind {
3521 TargetKind::Generated => 0,
3522 TargetKind::Tests => 1,
3523 TargetKind::Docs => 2,
3524 TargetKind::Source => 3,
3525 });
3526 targets.into_iter().find_map(|target| {
3527 if target.language != language {
3528 return None;
3529 }
3530 if !target.directories.iter().any(|directory| {
3531 directory.as_os_str().is_empty()
3532 || directory == Path::new(".")
3533 || relative_path.starts_with(directory)
3534 }) {
3535 return None;
3536 }
3537 if target.exclude.iter().any(|pattern| matches_simple_pattern(&relative, pattern)) {
3538 return None;
3539 }
3540 if !target.include.iter().any(|pattern| matches_simple_pattern(&relative, pattern)) {
3541 return None;
3542 }
3543 Some((target.language, target.kind))
3544 })
3545}
3546
3547fn git_changed_paths(root: &Path) -> anyhow::Result<GitChangedPaths> {
3548 let repo = gix::discover(root)?;
3549 let worktree_root = repo
3550 .workdir()
3551 .ok_or_else(|| anyhow::anyhow!("git repository has no worktree"))?
3552 .to_path_buf();
3553 let pathspec = config_root_pathspec(&worktree_root, root);
3554 let mut paths = GitChangedPaths::default();
3555
3556 for item in repo
3557 .status(gix::progress::Discard)?
3558 .untracked_files(UntrackedFiles::Files)
3559 .tree_index_track_renames(tree_index::TrackRenames::Disabled)
3560 .into_iter([pathspec])?
3561 {
3562 let item = item?;
3563 let Some(path) = repo_relative_path_to_config_path(&worktree_root, root, item.location())
3564 else {
3565 continue;
3566 };
3567 if root.join(&path).exists() {
3568 if !paths.deleted.contains(&path) {
3569 paths.changed.insert(path);
3570 }
3571 } else {
3572 paths.changed.remove(&path);
3573 paths.deleted.insert(path);
3574 }
3575 }
3576
3577 Ok(paths)
3578}
3579
3580fn repo_relative_path_to_config_path(
3581 worktree_root: &Path,
3582 config_root: &Path,
3583 repo_relative_path: &gix::bstr::BStr,
3584) -> Option<PathBuf> {
3585 let path = PathBuf::from(repo_relative_path.to_str_lossy().as_ref());
3586 worktree_root.join(path).strip_prefix(config_root).ok().map(Path::to_path_buf)
3587}
3588
3589fn config_root_pathspec(worktree_root: &Path, config_root: &Path) -> BString {
3590 let relative = config_root.strip_prefix(worktree_root).unwrap_or_else(|_| Path::new(""));
3591 let relative = path_string(relative);
3592 if relative.is_empty() || relative == "." {
3593 BString::from("*")
3594 } else {
3595 BString::from(format!("{relative}/**"))
3596 }
3597}
3598
3599fn matches_simple_pattern(path: &str, pattern: &str) -> bool {
3600 if let Some(extension) = pattern.strip_prefix("**/*.") {
3601 return path.ends_with(&format!(".{extension}"));
3602 }
3603 if let Some(prefix) = pattern.strip_suffix("/**") {
3604 return path.starts_with(prefix);
3605 }
3606 path == pattern || path.contains(pattern.trim_matches('*'))
3607}
3608
3609fn meta_for(conn: &rusqlite::Connection, key: &str) -> anyhow::Result<Option<String>> {
3610 Ok(conn
3611 .query_row("SELECT value FROM index_meta WHERE key = ?1", [key], |row| row.get(0))
3612 .optional()?)
3613}
3614
3615fn git_output(root: &Path, args: &[&str]) -> Option<String> {
3616 let output = Command::new("git").args(args).current_dir(root).output().ok()?;
3617 if !output.status.success() {
3618 return None;
3619 }
3620 Some(String::from_utf8_lossy(&output.stdout).trim().to_string())
3621}
3622
3623fn resolve_git_context(root: &Path) -> (String, String) {
3624 let commit_sha =
3625 git_output(root, &["rev-parse", "HEAD"]).map(|s| s.trim().to_string()).unwrap_or_default();
3626 let worktree_id = root.to_string_lossy().trim_end_matches('/').to_string();
3627 (commit_sha, worktree_id)
3628}
3629
3630fn live_worktree_contexts(root: &Path) -> (Vec<String>, Vec<String>) {
3634 let mut commits = Vec::new();
3635 let mut worktrees = Vec::new();
3636 let Some(output) = git_output(root, &["worktree", "list", "--porcelain"]) else {
3637 return (commits, worktrees);
3638 };
3639 for line in output.lines() {
3640 if let Some(path) = line.strip_prefix("worktree ") {
3641 worktrees.push(path.trim().trim_end_matches('/').to_string());
3642 } else if let Some(sha) = line.strip_prefix("HEAD ") {
3643 commits.push(sha.trim().to_string());
3644 }
3645 }
3646 (commits, worktrees)
3647}
3648
3649fn table_row_count(conn: &rusqlite::Connection, table: &str) -> anyhow::Result<u64> {
3650 let count = conn
3652 .query_row(&format!("SELECT COUNT(*) FROM main.{table}"), [], |row| row.get::<_, i64>(0))?;
3653 Ok(u64::try_from(count).unwrap_or(0))
3654}
3655
3656fn file_metadata_ms(path: &Path) -> anyhow::Result<i64> {
3657 let modified = fs::metadata(path)?.modified()?;
3658 Ok(duration_ms(modified.duration_since(UNIX_EPOCH)?))
3659}
3660
3661fn now_ms() -> i64 {
3662 duration_ms(SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default())
3663}
3664
3665fn duration_ms(duration: std::time::Duration) -> i64 {
3666 i64::try_from(duration.as_millis()).unwrap_or(i64::MAX)
3667}
3668
3669fn hex_sha256(bytes: &[u8]) -> String {
3670 let hash = Sha256::digest(bytes);
3671 let mut out = String::with_capacity(hash.len() * 2);
3672 for byte in hash {
3673 use std::fmt::Write as _;
3674 let _ = write!(out, "{byte:02x}");
3675 }
3676 out
3677}
3678
3679fn path_string(path: &Path) -> String {
3680 path.to_string_lossy().replace('\\', "/")
3681}
3682
3683#[cfg(test)]
3684mod schema_bootstrap_tests {
3685 use std::sync::atomic::{AtomicU64, Ordering};
3686
3687 use super::*;
3688 use crate::config::ResolvedTarget;
3689
3690 static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
3691
3692 #[test]
3693 fn rebuild_bootstraps_sqlite_schema_for_empty_target_root() {
3694 let root = unique_temp_root();
3695 let _ = fs::remove_dir_all(&root);
3696 let docs = root.join("docs");
3697 fs::create_dir_all(&docs).unwrap();
3698
3699 let config = Config {
3700 root: root.clone(),
3701 database: root.join(".rag-rat/index.sqlite"),
3702 targets: vec![ResolvedTarget {
3703 name: "markdown".to_string(),
3704 language: Language::Markdown,
3705 directories: vec![PathBuf::from("docs")],
3706 include: vec!["**/*.md".to_string()],
3707 exclude: Vec::new(),
3708 kind: TargetKind::Docs,
3709 }],
3710 local_ai: Default::default(),
3711 watch: Default::default(),
3712 };
3713
3714 let db = IndexDatabase::rebuild(&config).unwrap();
3715 assert!(config.database.exists());
3716 assert_eq!(table_count(&db, "files"), 1);
3717 assert_eq!(table_count(&db, "chunks"), 1);
3718 assert_eq!(table_count(&db, "symbols"), 1);
3719 assert_eq!(table_count(&db, "parser_failures"), 1);
3720 assert_eq!(table_count(&db, "index_meta"), 1);
3721 assert_eq!(table_count(&db, "chunk_fts"), 1);
3722 assert_eq!(table_count(&db, "git_commits"), 1);
3723 assert_eq!(table_count(&db, "git_file_changes"), 1);
3724 assert_eq!(table_count(&db, "git_chunk_blame"), 1);
3725 assert_eq!(table_count(&db, "commit_fts"), 1);
3726 assert_eq!(table_count(&db, "ai_models"), 1);
3727 assert_eq!(table_count(&db, "chunk_embeddings"), 1);
3728 assert_eq!(table_count(&db, "chunk_summaries"), 1);
3729 assert_eq!(table_count(&db, "reconcile_meta"), 1);
3730 assert_eq!(table_count(&db, "reconcile_attempts"), 1);
3731 assert!(file_columns(&db).contains(&"indexed_revision".to_string()));
3732 assert_eq!(indexed_revision_count(&db), 0);
3733 assert!(chunk_columns(&db).contains(&"anchor_version".to_string()));
3734 assert!(chunk_columns(&db).contains(&"normalized_hash".to_string()));
3735 assert!(chunk_columns(&db).contains(&"start_boundary_hash".to_string()));
3736 assert!(chunk_columns(&db).contains(&"end_boundary_hash".to_string()));
3737 assert!(chunk_columns(&db).contains(&"source_revision".to_string()));
3738 let embedding_columns = table_columns(&db, "chunk_embeddings");
3739 assert!(embedding_columns.contains(&"model_version".to_string()));
3740 assert!(embedding_columns.contains(&"input_hash".to_string()));
3741 assert!(embedding_columns.contains(&"embedding_text_version".to_string()));
3742 assert!(embedding_columns.contains(&"embedding_policy".to_string()));
3743 assert!(embedding_columns.contains(&"embedding_priority".to_string()));
3744 assert!(embedding_columns.contains(&"input_chars".to_string()));
3745 assert!(embedding_columns.contains(&"input_truncated".to_string()));
3746 assert!(embedding_columns.contains(&"attempt_count".to_string()));
3747 assert!(embedding_columns.contains(&"next_retry_after_ms".to_string()));
3748 assert!(embedding_columns.contains(&"computed_at_ms".to_string()));
3749 let edge_columns = table_columns(&db, "edges");
3750 assert!(edge_columns.contains(&"source_start_line".to_string()));
3751 assert!(edge_columns.contains(&"source_end_line".to_string()));
3752 assert!(edge_columns.contains(&"source_start_byte".to_string()));
3753 assert!(edge_columns.contains(&"source_end_byte".to_string()));
3754 assert!(edge_columns.contains(&"target_start_line".to_string()));
3755 assert!(edge_columns.contains(&"target_end_line".to_string()));
3756 assert!(edge_columns.contains(&"target_qualified_name".to_string()));
3757 assert!(edge_columns.contains(&"evidence".to_string()));
3758 assert!(edge_columns.contains(&"receiver_hint".to_string()));
3759 assert!(edge_columns.contains(&"resolution".to_string()));
3760 let logical_columns = table_columns(&db, "logical_symbols");
3761 assert!(logical_columns.contains(&"qualified_name".to_string()));
3762 assert!(logical_columns.contains(&"variant_count".to_string()));
3763 let member_columns = table_columns(&db, "logical_symbol_members");
3764 assert!(member_columns.contains(&"symbol_id".to_string()));
3765 assert!(member_columns.contains(&"signature_hash".to_string()));
3766 let github_ref_sync_columns = table_columns(&db, "github_ref_sync");
3767 assert!(github_ref_sync_columns.contains(&"status".to_string()));
3768 assert!(github_ref_sync_columns.contains(&"last_error".to_string()));
3769 let symbol_fact_columns = table_columns(&db, "symbol_facts");
3770 assert!(symbol_fact_columns.contains(&"fact_kind".to_string()));
3771 assert!(symbol_fact_columns.contains(&"fact_value".to_string()));
3772 assert_eq!(
3773 db.status(&config.database).unwrap().schema.current_version,
3774 schema::LATEST_SCHEMA_VERSION
3775 );
3776
3777 fs::remove_dir_all(root).unwrap();
3778 }
3779
3780 #[test]
3781 fn rebuild_reports_file_preparation_progress() {
3782 let root = unique_temp_root();
3783 let _ = fs::remove_dir_all(&root);
3784 fs::create_dir_all(root.join("src")).unwrap();
3785 fs::write(root.join("src/lib.rs"), "pub fn exported() {}\n").unwrap();
3786
3787 let config = source_config(root.clone(), Language::Rust);
3788 let mut events = Vec::new();
3789 IndexDatabase::rebuild_with_progress(&config, |progress| events.push(progress)).unwrap();
3790
3791 assert!(
3792 events.iter().any(|event| matches!(event, IndexProgress::PreparingFile { .. })),
3793 "missing preparing progress event: {events:?}"
3794 );
3795 assert!(
3796 events.iter().any(|event| matches!(event, IndexProgress::IndexingFile { .. })),
3797 "missing indexing progress event: {events:?}"
3798 );
3799
3800 fs::remove_dir_all(root).unwrap();
3801 }
3802
3803 #[test]
3804 fn file_progress_reports_first_final_and_decile_boundaries() {
3805 let reported = (1..=100)
3806 .filter(|current| should_report_file_progress(*current, 100))
3807 .collect::<Vec<_>>();
3808 assert_eq!(reported, vec![1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]);
3809 }
3810
3811 #[test]
3812 fn compatible_open_requires_recorded_schema_version() {
3813 let root = unique_temp_root();
3814 let _ = fs::remove_dir_all(&root);
3815 fs::create_dir_all(root.join(".rag-rat")).unwrap();
3816 let database = root.join(".rag-rat/index.sqlite");
3817 IndexDatabase::migrate(&database).unwrap();
3818 let conn = rusqlite::Connection::open(&database).unwrap();
3819 conn.execute_batch("DROP TABLE schema_version;").unwrap();
3820 drop(conn);
3821
3822 let status = IndexDatabase::migration_check(&database).unwrap();
3823 assert_eq!(status.state, schema::SchemaState::Older);
3824 let err = IndexDatabase::open(&database).unwrap_err().to_string();
3825 assert!(err.contains("run `rag-rat migrate`"), "{err}");
3826
3827 let migrated = IndexDatabase::migrate(&database).unwrap();
3828 assert_eq!(migrated.state, schema::SchemaState::Compatible);
3829 IndexDatabase::open(&database).unwrap();
3830
3831 fs::remove_dir_all(root).unwrap();
3832 }
3833
3834 #[test]
3835 fn migrate_adds_edge_name_columns_before_indexing_them() {
3836 let root = unique_temp_root();
3837 let _ = fs::remove_dir_all(&root);
3838 fs::create_dir_all(root.join(".rag-rat")).unwrap();
3839 let database = root.join(".rag-rat/index.sqlite");
3840 let conn = rusqlite::Connection::open(&database).unwrap();
3841 conn.execute_batch(
3842 "
3843 CREATE TABLE files(
3844 id INTEGER PRIMARY KEY AUTOINCREMENT,
3845 path TEXT NOT NULL UNIQUE,
3846 language TEXT NOT NULL,
3847 kind TEXT NOT NULL,
3848 sha256 TEXT NOT NULL,
3849 modified_at_ms INTEGER NOT NULL,
3850 generated INTEGER NOT NULL DEFAULT 0,
3851 indexed_at_ms INTEGER NOT NULL
3852 );
3853 CREATE TABLE chunks(
3854 id INTEGER PRIMARY KEY AUTOINCREMENT,
3855 file_id INTEGER NOT NULL,
3856 chunk_kind TEXT NOT NULL,
3857 symbol_path TEXT,
3858 start_byte INTEGER NOT NULL,
3859 end_byte INTEGER NOT NULL,
3860 start_line INTEGER NOT NULL,
3861 end_line INTEGER NOT NULL,
3862 text TEXT NOT NULL,
3863 text_hash TEXT NOT NULL
3864 );
3865 CREATE TABLE symbols(
3866 id INTEGER PRIMARY KEY AUTOINCREMENT,
3867 file_id INTEGER NOT NULL,
3868 language TEXT NOT NULL,
3869 name TEXT NOT NULL,
3870 qualified_name TEXT NOT NULL,
3871 kind TEXT NOT NULL,
3872 start_byte INTEGER NOT NULL,
3873 end_byte INTEGER NOT NULL,
3874 signature TEXT,
3875 docs TEXT
3876 );
3877 CREATE TABLE edges(
3878 id INTEGER PRIMARY KEY AUTOINCREMENT,
3879 from_symbol_id INTEGER,
3880 to_symbol_id INTEGER,
3881 edge_kind TEXT NOT NULL,
3882 confidence TEXT NOT NULL
3883 );
3884 ",
3885 )
3886 .unwrap();
3887 drop(conn);
3888
3889 let migrated = IndexDatabase::migrate(&database).unwrap();
3890 assert_eq!(migrated.state, schema::SchemaState::Compatible);
3891 let db = IndexDatabase::open(&database).unwrap();
3892 let columns = table_columns(&db, "edges");
3893 assert!(columns.contains(&"from_name".to_string()));
3894 assert!(columns.contains(&"to_name".to_string()));
3895 assert!(columns.contains(&"source_start_line".to_string()));
3896 assert!(columns.contains(&"source_end_line".to_string()));
3897 assert!(columns.contains(&"source_start_byte".to_string()));
3898 assert!(columns.contains(&"source_end_byte".to_string()));
3899 assert!(columns.contains(&"target_start_line".to_string()));
3900 assert!(columns.contains(&"target_end_line".to_string()));
3901 assert_eq!(table_count(&db, "idx_edges_from_name"), 1);
3902 assert_eq!(table_count(&db, "idx_edges_to_name"), 1);
3903
3904 fs::remove_dir_all(root).unwrap();
3905 }
3906
3907 #[test]
3908 fn migrate_preserves_github_papertrail_cache() {
3909 let (root, config) =
3910 markdown_config("# Decision\nRefs cq27-dev/rag-rat#42\nwe will keep sqlite\n");
3911 let db = IndexDatabase::rebuild(&config).unwrap();
3912 github::sync_from_refs(db.storage.connection(), &root, Some(&MockGitHubClient), false)
3913 .unwrap();
3914 assert_eq!(row_count(&db, "github_refs"), 1);
3915 assert_eq!(row_count(&db, "github_issues"), 1);
3916 assert_eq!(row_count(&db, "github_comments"), 1);
3917 assert_eq!(row_count(&db, "github_pull_requests"), 1);
3918 assert_eq!(row_count(&db, "github_reviews"), 1);
3919 assert_eq!(row_count(&db, "github_review_comments"), 1);
3920 assert_eq!(row_count(&db, "github_fts"), 5);
3921 db.storage
3922 .connection()
3923 .execute("DELETE FROM schema_version WHERE id = ?1", ["010_symbol_facts"])
3924 .unwrap();
3925 drop(db);
3926
3927 let migrated = IndexDatabase::migrate(&config.database).unwrap();
3928 assert_eq!(migrated.state, schema::SchemaState::Compatible);
3929 let db = IndexDatabase::open(&config.database).unwrap();
3930 assert_eq!(row_count(&db, "github_refs"), 1);
3931 assert_eq!(row_count(&db, "github_issues"), 1);
3932 assert_eq!(row_count(&db, "github_comments"), 1);
3933 assert_eq!(row_count(&db, "github_pull_requests"), 1);
3934 assert_eq!(row_count(&db, "github_reviews"), 1);
3935 assert_eq!(row_count(&db, "github_review_comments"), 1);
3936 assert_eq!(row_count(&db, "github_fts"), 5);
3937 let hits = db.github_issue_search("sqlite", 10).unwrap();
3938 assert_eq!(hits.len(), 1);
3939 assert_eq!(hits[0].number, 42);
3940
3941 fs::remove_dir_all(root).unwrap();
3942 }
3943
3944 #[test]
3945 fn full_rebuild_preserves_github_papertrail_cache() {
3946 let (root, config) =
3947 markdown_config("# Decision\nRefs cq27-dev/rag-rat#42\nwe will keep sqlite\n");
3948 let db = IndexDatabase::rebuild(&config).unwrap();
3949 github::sync_from_refs(db.storage.connection(), &root, Some(&MockGitHubClient), false)
3950 .unwrap();
3951 assert_eq!(row_count(&db, "github_issues"), 1);
3952 assert_eq!(row_count(&db, "github_fts"), 5);
3953 drop(db);
3954
3955 let db = IndexDatabase::rebuild(&config).unwrap();
3956
3957 assert_eq!(row_count(&db, "github_refs"), 1);
3958 assert_eq!(row_count(&db, "github_issues"), 1);
3959 assert_eq!(row_count(&db, "github_comments"), 1);
3960 assert_eq!(row_count(&db, "github_pull_requests"), 1);
3961 assert_eq!(row_count(&db, "github_reviews"), 1);
3962 assert_eq!(row_count(&db, "github_review_comments"), 1);
3963 assert_eq!(row_count(&db, "github_ref_sync"), 1);
3964 assert_eq!(row_count(&db, "github_fts"), 5);
3965 let hits = db.github_issue_search("sqlite", 10).unwrap();
3966 assert_eq!(hits.len(), 1);
3967 assert_eq!(hits[0].number, 42);
3968
3969 fs::remove_dir_all(root).unwrap();
3970 }
3971
3972 #[test]
3973 fn full_rebuild_preserves_installed_model_manifest() {
3974 let (root, config) = markdown_config("alpha token with enough detail for embeddings\n");
3975 let db = IndexDatabase::rebuild(&config).unwrap();
3976 db.install_model(ai::HASH_MODEL_ID).unwrap();
3977 let before = db.local_ai_status().unwrap();
3978 assert_eq!(before.embedding.model_id, ai::HASH_MODEL_ID);
3979 assert!(before.embedding.installed);
3980 drop(db);
3981
3982 let db = IndexDatabase::rebuild(&config).unwrap();
3983
3984 let after = db.local_ai_status().unwrap();
3985 assert_eq!(after.embedding.model_id, ai::HASH_MODEL_ID);
3986 assert!(after.embedding.installed);
3987 assert_eq!(after.embedding.state, "Ready");
3988
3989 fs::remove_dir_all(root).unwrap();
3990 }
3991
3992 #[test]
3993 fn full_rebuild_preserves_other_worktree_contexts() {
3994 let root = unique_temp_root();
3995 let _ = fs::remove_dir_all(&root);
3996 fs::create_dir_all(root.join("src")).unwrap();
3997 fs::write(root.join("src/lib.rs"), "pub fn current_context() {}\n").unwrap();
3998 let config = source_config(root.clone(), Language::Rust);
3999 let db = IndexDatabase::rebuild(&config).unwrap();
4000 let other_file_id = db
4001 .storage
4002 .connection()
4003 .query_row(
4004 "
4005 INSERT INTO main.files(
4006 path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms,
4007 indexed_revision, commit_sha, worktree_id
4008 )
4009 VALUES ('src/other.rs', 'rust', 'source', 'other-sha', 0, 0, 1, 'other-sha', '', 'other-worktree')
4010 RETURNING id
4011 ",
4012 [],
4013 |row| row.get::<_, i64>(0),
4014 )
4015 .unwrap();
4016 let other_chunk_id = db
4017 .storage
4018 .connection()
4019 .query_row(
4020 "
4021 INSERT INTO main.chunks(
4022 file_id, chunk_kind, symbol_path, start_byte, end_byte, start_line, end_line,
4023 text, text_hash, source_revision, anchor_version, normalized_hash,
4024 start_boundary_hash, end_boundary_hash, start_context_hash, end_context_hash,
4025 context_radius, embedding_policy, embedding_priority
4026 )
4027 VALUES (?1, 'symbol', 'other_context', 0, 12, 1, 1, 'other context', 'other-text',
4028 'other-sha', 1, '', '', '', '', '', 2, 'Embed', 1)
4029 RETURNING id
4030 ",
4031 [other_file_id],
4032 |row| row.get::<_, i64>(0),
4033 )
4034 .unwrap();
4035 db.storage
4036 .connection()
4037 .execute(
4038 "
4039 INSERT INTO main.symbols(
4040 file_id, language, name, qualified_name, kind, start_byte, end_byte, signature, docs
4041 )
4042 VALUES (?1, 'rust', 'other_context', 'other_context', 'function', 0, 12, NULL, NULL)
4043 ",
4044 [other_file_id],
4045 )
4046 .unwrap();
4047 db.storage
4048 .connection()
4049 .execute(
4050 "INSERT INTO main.chunk_fts(rowid, text) VALUES (?1, 'other context')",
4051 [other_chunk_id],
4052 )
4053 .unwrap();
4054 drop(db);
4055
4056 let db = IndexDatabase::rebuild(&config).unwrap();
4057
4058 assert_eq!(
4059 db.storage
4060 .connection()
4061 .query_row(
4062 "SELECT COUNT(*) FROM main.files WHERE worktree_id = 'other-worktree'",
4063 [],
4064 |row| row.get::<_, i64>(0)
4065 )
4066 .unwrap(),
4067 1
4068 );
4069 assert_eq!(
4070 db.storage
4071 .connection()
4072 .query_row(
4073 "SELECT COUNT(*) FROM main.chunks WHERE file_id = ?1",
4074 [other_file_id],
4075 |row| { row.get::<_, i64>(0) }
4076 )
4077 .unwrap(),
4078 1
4079 );
4080 assert_eq!(
4081 db.storage
4082 .connection()
4083 .query_row(
4084 "SELECT COUNT(*) FROM main.symbols WHERE file_id = ?1",
4085 [other_file_id],
4086 |row| { row.get::<_, i64>(0) }
4087 )
4088 .unwrap(),
4089 1
4090 );
4091 assert_eq!(
4092 db.storage
4093 .connection()
4094 .query_row(
4095 "SELECT COUNT(*) FROM main.chunk_fts WHERE rowid = ?1",
4096 [other_chunk_id],
4097 |row| { row.get::<_, i64>(0) }
4098 )
4099 .unwrap(),
4100 1
4101 );
4102
4103 fs::remove_dir_all(root).unwrap();
4104 }
4105
4106 #[test]
4107 fn compatible_open_refuses_dirty_and_newer_schema() {
4108 let root = unique_temp_root();
4109 let _ = fs::remove_dir_all(&root);
4110 fs::create_dir_all(root.join(".rag-rat")).unwrap();
4111 let database = root.join(".rag-rat/index.sqlite");
4112 let conn = rusqlite::Connection::open(&database).unwrap();
4113 conn.execute_batch(
4114 "
4115 CREATE TABLE schema_version(
4116 id TEXT PRIMARY KEY,
4117 applied_at_ms INTEGER NOT NULL,
4118 checksum TEXT NOT NULL,
4119 description TEXT NOT NULL
4120 );
4121 INSERT INTO schema_version(id, applied_at_ms, checksum, description)
4122 VALUES ('__dirty__', 1, '', 'partial migration in progress');
4123 ",
4124 )
4125 .unwrap();
4126 drop(conn);
4127
4128 let dirty = IndexDatabase::migration_check(&database).unwrap();
4129 assert_eq!(dirty.state, schema::SchemaState::Dirty);
4130 let err = IndexDatabase::open(&database).unwrap_err().to_string();
4131 assert!(err.contains("dirty or partial"), "{err}");
4132
4133 let conn = rusqlite::Connection::open(&database).unwrap();
4134 conn.execute_batch(
4135 "
4136 DELETE FROM schema_version;
4137 INSERT INTO schema_version(id, applied_at_ms, checksum, description)
4138 VALUES ('999_future_schema', 1, 'sha256:future', 'future schema');
4139 ",
4140 )
4141 .unwrap();
4142 drop(conn);
4143 let newer = IndexDatabase::migration_check(&database).unwrap();
4144 assert_eq!(newer.state, schema::SchemaState::Newer);
4145 let err = IndexDatabase::open(&database).unwrap_err().to_string();
4146 assert!(err.contains("newer rag-rat"), "{err}");
4147
4148 fs::remove_dir_all(root).unwrap();
4149 }
4150
4151 #[test]
4152 fn discover_mode_indexes_new_files_and_removes_deleted_files() {
4153 let root = unique_temp_root();
4154 let _ = fs::remove_dir_all(&root);
4155 fs::create_dir_all(root.join("src")).unwrap();
4156 fs::write(root.join("src/lib.rs"), "pub fn old_symbol() {}\n").unwrap();
4157 let config = source_config(root.clone(), Language::Rust);
4158 let db = IndexDatabase::rebuild(&config).unwrap();
4159 assert_eq!(db.discovery_status(&config).unwrap().unindexed_source_files, 0);
4160
4161 fs::write(root.join("src/new.rs"), "pub fn new_symbol() {}\n").unwrap();
4162 fs::remove_file(root.join("src/lib.rs")).unwrap();
4163 let drift = db.discovery_status(&config).unwrap();
4164 assert_eq!(drift.unindexed_source_files, 1);
4165 assert_eq!(drift.removed_indexed_files, 1);
4166 assert!(drift.warning.as_deref().unwrap().contains("rag-rat index --discover"));
4167
4168 let db = IndexDatabase::index_discover(&config).unwrap();
4169 let fresh = db.discovery_status(&config).unwrap();
4170 assert_eq!(fresh.unindexed_source_files, 0);
4171 assert_eq!(fresh.removed_indexed_files, 0);
4172 assert!(fresh.warning.is_none());
4173 assert_eq!(db.symbols("new_symbol", Some(Language::Rust), 10).unwrap().len(), 1);
4174 assert!(db.symbols("old_symbol", Some(Language::Rust), 10).unwrap().is_empty());
4175
4176 let mut events = Vec::new();
4177 let db = IndexDatabase::index_discover_with_progress(&config, |progress| {
4178 events.push(progress);
4179 })
4180 .unwrap();
4181 assert!(matches!(events.last(), Some(IndexProgress::Finished { files: 0 })));
4182 assert!(
4183 !events.iter().any(|event| matches!(
4184 event,
4185 IndexProgress::PreparingFile { .. } | IndexProgress::IndexingFile { .. }
4186 )),
4187 "no-op discover should not prepare or index files: {events:?}"
4188 );
4189 assert_eq!(db.symbols("new_symbol", Some(Language::Rust), 10).unwrap().len(), 1);
4190
4191 fs::remove_dir_all(root).unwrap();
4192 }
4193
4194 #[cfg(unix)]
4195 #[test]
4196 fn indexing_skips_symlink_loops() {
4197 let root = unique_temp_root();
4198 let _ = fs::remove_dir_all(&root);
4199 fs::create_dir_all(root.join("src")).unwrap();
4200 fs::write(root.join("src/lib.rs"), "pub fn loop_safe_symbol() {}\n").unwrap();
4201 std::os::unix::fs::symlink(&root, root.join("src/loop")).unwrap();
4202
4203 let config = source_config(root.clone(), Language::Rust);
4204 let db = IndexDatabase::rebuild(&config).unwrap();
4205
4206 assert_eq!(db.symbols("loop_safe_symbol", Some(Language::Rust), 10).unwrap().len(), 1);
4207
4208 fs::remove_dir_all(root).unwrap();
4209 }
4210
4211 #[test]
4212 fn dirty_git_files_are_indexed_as_worktree_overlay() {
4213 let root = unique_temp_root();
4214 let _ = fs::remove_dir_all(&root);
4215 let docs = root.join("docs");
4216 fs::create_dir_all(&docs).unwrap();
4217 fs::write(docs.join("search.md"), "# Title\nbase token\n").unwrap();
4218 run_git(&root, &["init"]);
4219 run_git(&root, &["add", "."]);
4220 run_git(
4221 &root,
4222 &[
4223 "-c",
4224 "user.name=Rag Rat Test",
4225 "-c",
4226 "user.email=rag-rat@example.invalid",
4227 "commit",
4228 "-m",
4229 "initial",
4230 ],
4231 );
4232
4233 let config = markdown_config_for_root(root.clone());
4234 let db = IndexDatabase::rebuild(&config).unwrap();
4235 assert_eq!(db.search("base", 10, false).unwrap().len(), 1);
4236
4237 fs::write(docs.join("search.md"), "# Title\noverlay token\n").unwrap();
4238 let db = IndexDatabase::index_changed(&config).unwrap();
4239 let scopes = db
4240 .storage
4241 .connection()
4242 .prepare(
4243 "
4244 SELECT commit_sha != '', worktree_id != ''
4245 FROM main.files
4246 WHERE path = 'docs/search.md'
4247 ORDER BY commit_sha != '' DESC, worktree_id != '' DESC
4248 ",
4249 )
4250 .unwrap()
4251 .query_map([], |row| Ok((row.get::<_, bool>(0)?, row.get::<_, bool>(1)?)))
4252 .unwrap()
4253 .collect::<Result<Vec<_>, _>>()
4254 .unwrap();
4255
4256 assert_eq!(scopes, vec![(true, false), (false, true)]);
4257 assert!(db.search("base", 10, false).unwrap().is_empty());
4258 let overlay_hits = db.search("overlay", 10, false).unwrap();
4259 assert_eq!(overlay_hits.len(), 1);
4260 assert!(overlay_hits[0].summary.contains("overlay token"));
4261
4262 fs::remove_dir_all(root).unwrap();
4263 }
4264
4265 #[test]
4266 fn rebuild_populates_revision_metadata_and_fresh_fts_state() {
4267 let (root, config) = markdown_config("alpha token");
4268 let db = IndexDatabase::rebuild(&config).unwrap();
4269 let status = db.status(&config.database).unwrap();
4270
4271 assert!(!status.content_revision.is_empty());
4272 assert_eq!(status.fts_source_revision.as_deref(), Some(status.content_revision.as_str()));
4273 assert_eq!(
4274 db.meta("content_revision").unwrap().as_deref(),
4275 Some(status.content_revision.as_str())
4276 );
4277 assert!(!status.fts_dirty);
4278 assert!(status.fts_fresh);
4279 assert!(!status.git_history.available);
4280 assert_eq!(status.git_history.commit_count, 0);
4281 assert_eq!(status.local_ai.embedding.state, "MissingModel");
4282 assert_eq!(status.local_ai.fastembed.backend, "fastembed");
4283 assert_eq!(status.local_ai.fastembed.model, ai::FASTEMBED_DISPLAY_MODEL);
4284 assert_eq!(status.local_ai.fastembed.dim, ai::FASTEMBED_EMBEDDING_DIM);
4285 assert!(!status.local_ai.fastembed.cache.is_empty());
4286 assert_eq!(status.local_ai.fastembed.build_feature_enabled, cfg!(feature = "fastembed"));
4287 assert_eq!(status.local_ai.artifacts.total_chunks, 1);
4288 assert_eq!(
4289 status.local_ai.artifacts.eligible_chunks + status.local_ai.artifacts.skipped_chunks,
4290 status.local_ai.artifacts.total_chunks
4291 );
4292 assert_eq!(
4293 status.local_ai.fastembed.eligible_embeddings
4294 + status.local_ai.fastembed.skipped_embeddings,
4295 status.local_ai.artifacts.total_chunks
4296 );
4297 assert_eq!(indexed_revision_count(&db), 1);
4298 assert_eq!(chunk_source_revision_count(&db), 1);
4299
4300 fs::remove_dir_all(root).unwrap();
4301 }
4302
4303 #[cfg(not(feature = "fastembed"))]
4304 #[test]
4305 fn fastembed_missing_feature_reports_rebuild_command() {
4306 let (root, config) = markdown_config("alpha token\n");
4307 let db = IndexDatabase::rebuild(&config).unwrap();
4308
4309 let err = db.install_model(ai::FASTEMBED_MODEL_ID).unwrap_err();
4310 assert!(err.to_string().contains(ai::FASTEMBED_MISSING_FEATURE_MESSAGE));
4311
4312 let status = db.local_ai_status().unwrap();
4313 assert!(!status.fastembed.build_feature_enabled);
4314 assert_eq!(status.fastembed.status, "MissingRuntime");
4315 assert_eq!(
4316 status.fastembed.message.as_deref(),
4317 Some(ai::FASTEMBED_MISSING_FEATURE_MESSAGE)
4318 );
4319 assert_eq!(status.fastembed.next.as_deref(), Some("cargo install rag-rat"));
4320
4321 fs::remove_dir_all(root).unwrap();
4322 }
4323
4324 #[test]
4325 fn reconcile_requires_explicit_model_install_and_ignores_stale_artifacts() {
4326 let (root, config) = markdown_config(
4327 "alpha token\nsecond line with enough detail for the semantic embedding policy to keep this chunk\nthird line with runtime context\n",
4328 );
4329 let db = IndexDatabase::rebuild(&config).unwrap();
4330 let chunk_id = first_chunk_id(&db);
4331
4332 let models = db.list_models().unwrap();
4333 let embedding = models.iter().find(|model| model.model_id == ai::HASH_MODEL_ID).unwrap();
4334 assert!(!embedding.installed);
4335 assert_eq!(embedding.status, "MissingModel");
4336
4337 let hits = db.search("alpha", 10, false).unwrap();
4338 assert_eq!(hits.len(), 1);
4339 assert!(hits[0].summary.contains("alpha token"));
4340
4341 let blocked = db.reconcile(Some(1), Some(8)).unwrap();
4342 assert_eq!(blocked.processed_chunks, 0);
4343 assert_eq!(blocked.embeddings_written, 0);
4344 assert_eq!(blocked.blocked_chunks, 0);
4345 assert_eq!(blocked.model_id, ai::HASH_MODEL_ID);
4346 assert_eq!(blocked.batch_size, 8);
4347 assert_eq!(blocked.status, "Blocked");
4348
4349 let status = db.local_ai_status().unwrap();
4350 assert_eq!(status.embedding.state, "MissingModel");
4351 assert_eq!(status.embedding.blocked_artifacts, 0);
4352
4353 db.install_model(ai::HASH_MODEL_ID).unwrap();
4354 let plan = db.reconcile_plan().unwrap();
4355 assert_eq!(plan.embeddings.missing, 1);
4356 assert_eq!(plan.embeddings.current, 0);
4357 let current = db.reconcile(Some(1), Some(8)).unwrap();
4358 assert_eq!(current.embeddings_written, 1);
4359 assert_eq!(current.model_id, ai::HASH_MODEL_ID);
4360 assert_eq!(current.model_version, "hash-v1");
4361 assert_eq!(current.embedding_dim, ai::HASH_EMBEDDING_DIM);
4362 assert_eq!(current.status, "Current");
4363 assert_eq!(current.work_reasons.get("Missing"), Some(&1));
4364 let noop = db.reconcile(None, Some(8)).unwrap();
4365 assert_eq!(noop.processed_chunks, 0);
4366 assert_eq!(noop.embeddings_written, 0);
4367 let status = db.local_ai_status().unwrap();
4368 assert_eq!(status.embedding.state, "Ready");
4369 assert_eq!(status.embedding.current_artifacts, 1);
4370 let embedding_bytes: i64 = db
4371 .storage
4372 .connection()
4373 .query_row(
4374 "SELECT length(vector_blob) FROM chunk_embeddings WHERE chunk_id = ?1 AND status = 'Current'",
4375 [chunk_id],
4376 |row| row.get(0),
4377 )
4378 .unwrap();
4379 assert_eq!(embedding_bytes, (ai::HASH_EMBEDDING_DIM * 4) as i64);
4380
4381 let hits = db.search("alpha", 10, false).unwrap();
4382 assert!(hits[0].summary.contains("alpha token"));
4383
4384 db.storage.connection().execute("DELETE FROM chunk_fts", []).unwrap();
4385 let vector_hits = db.search("alpha", 10, false).unwrap();
4386 assert_eq!(vector_hits.len(), 1);
4387 assert_eq!(vector_hits[0].chunk_id, chunk_id);
4388
4389 db.storage
4390 .connection()
4391 .execute(
4392 "UPDATE chunk_embeddings SET source_text_hash = 'old-hash' WHERE chunk_id = ?1",
4393 [chunk_id],
4394 )
4395 .unwrap();
4396 let plan = db.reconcile_plan().unwrap();
4397 assert_eq!(plan.embeddings.current, 0);
4398 assert_eq!(plan.embeddings.stale, 1);
4399 let refreshed = db.reconcile(None, Some(8)).unwrap();
4400 assert_eq!(refreshed.processed_chunks, 1);
4401 assert_eq!(refreshed.work_reasons.get("SourceChanged"), Some(&1));
4402 assert_eq!(db.current_embedding_count(ai::HASH_MODEL_ID).unwrap(), 1);
4403 let stale_embedding_hits = db.search("alpha", 10, false).unwrap();
4404 assert_eq!(stale_embedding_hits.len(), 1);
4405
4406 fs::remove_dir_all(root).unwrap();
4407 }
4408
4409 #[cfg(feature = "fastembed")]
4410 #[test]
4411 fn cached_fastembed_model_recovers_ready_state() {
4412 let (root, config) = markdown_config("alpha token\n");
4413 let db = IndexDatabase::rebuild(&config).unwrap();
4414 let cache_dir = root.join("models");
4415 let revision = "5f1b8cd78bc4fb444dd171e59b18f3a3af89a079";
4416 let repo = cache_dir.join("models--Qdrant--all-MiniLM-L6-v2-onnx");
4417 fs::create_dir_all(repo.join("refs")).unwrap();
4418 fs::create_dir_all(repo.join("snapshots").join(revision)).unwrap();
4419 fs::write(repo.join("refs").join("main"), revision).unwrap();
4420
4421 ai::recover_cached_fastembed_model_at(db.storage.connection(), &cache_dir).unwrap();
4422
4423 let models = db.list_models().unwrap();
4424 let fastembed =
4425 models.iter().find(|model| model.model_id == ai::FASTEMBED_MODEL_ID).unwrap();
4426 assert!(fastembed.installed);
4427 assert_eq!(fastembed.status, "Ready");
4428 let status = db.local_ai_status().unwrap();
4429 assert_eq!(status.fastembed.status, "Ready");
4430 assert!(status.fastembed.active);
4431
4432 fs::remove_dir_all(root).unwrap();
4433 }
4434
4435 #[cfg(feature = "fastembed")]
4436 #[test]
4437 fn compatible_migrate_recovers_cached_fastembed_model() {
4438 let (root, config) = markdown_config("alpha token\n");
4439 let db = IndexDatabase::rebuild(&config).unwrap();
4440 let cache_dir = root.join("models");
4441 let revision = "5f1b8cd78bc4fb444dd171e59b18f3a3af89a079";
4442 let repo = cache_dir.join("models--Qdrant--all-MiniLM-L6-v2-onnx");
4443 fs::create_dir_all(repo.join("refs")).unwrap();
4444 fs::create_dir_all(repo.join("snapshots").join(revision)).unwrap();
4445 fs::write(repo.join("refs").join("main"), revision).unwrap();
4446 db.storage
4447 .connection()
4448 .execute(
4449 "UPDATE ai_models
4450 SET installed = 0, status = 'MissingModel', installed_at_ms = NULL
4451 WHERE model_id = ?1",
4452 [ai::FASTEMBED_MODEL_ID],
4453 )
4454 .unwrap();
4455
4456 IndexDatabase::migrate_with_fastembed_cache(&config.database, Some(&cache_dir)).unwrap();
4457
4458 let db = IndexDatabase::open(&config.database).unwrap();
4459 let status = db.local_ai_status().unwrap();
4460 assert_eq!(status.fastembed.status, "Ready");
4461 assert!(status.fastembed.active);
4462
4463 fs::remove_dir_all(root).unwrap();
4464 }
4465
4466 #[test]
4467 fn reconcile_without_limit_processes_all_chunks() {
4468 let (root, config) = markdown_config(
4469 "# One\nalpha token with enough surrounding detail for embedding eligibility and useful semantic context\n\n# Two\nbeta token with enough surrounding detail for embedding eligibility and useful semantic context\n",
4470 );
4471 let db = IndexDatabase::rebuild(&config).unwrap();
4472 db.install_model(ai::HASH_MODEL_ID).unwrap();
4473
4474 let report = db.reconcile(None, Some(2)).unwrap();
4475
4476 assert_eq!(report.processed_chunks, 2);
4477 assert_eq!(report.embeddings_written, 2);
4478 assert_eq!(report.batch_size, 2);
4479 assert_eq!(db.current_embedding_count(ai::HASH_MODEL_ID).unwrap(), 2);
4480 let second = db.reconcile(None, Some(2)).unwrap();
4481 assert_eq!(second.processed_chunks, 0);
4482
4483 fs::remove_dir_all(root).unwrap();
4484 }
4485
4486 #[test]
4487 fn force_reconcile_processes_each_chunk_once_and_terminates() {
4488 let (root, config) = markdown_config(
4493 "# One\nalpha token with enough surrounding detail for embedding eligibility and useful semantic context\n\n# Two\nbeta token with enough surrounding detail for embedding eligibility and useful semantic context\n",
4494 );
4495 let db = IndexDatabase::rebuild(&config).unwrap();
4496 db.install_model(ai::HASH_MODEL_ID).unwrap();
4497
4498 let report = db.reconcile_with_progress(Some(50), Some(2), true, |_| {}).unwrap();
4500
4501 assert_eq!(report.embeddings_written, 2, "force re-embedded chunks: {report:?}");
4502 assert_eq!(report.processed_chunks, 2, "force re-processed chunks: {report:?}");
4503
4504 fs::remove_dir_all(root).unwrap();
4505 }
4506
4507 #[test]
4508 fn force_reconcile_progress_is_honest_and_terminates_without_limit() {
4509 let (root, config) = markdown_config(
4510 "# One\nalpha token with enough surrounding detail for embedding eligibility and useful semantic context\n\n# Two\nbeta token with enough surrounding detail for embedding eligibility and useful semantic context\n",
4511 );
4512 let db = IndexDatabase::rebuild(&config).unwrap();
4513 db.install_model(ai::HASH_MODEL_ID).unwrap();
4514
4515 let mut events = Vec::new();
4519 let report = db
4520 .reconcile_with_options_progress(
4521 ai::ReconcileOptions {
4522 force: true,
4523 batch_size: Some(1),
4524 max_seconds: Some(30),
4525 ..ai::ReconcileOptions::default()
4526 },
4527 |event| events.push(event),
4528 )
4529 .unwrap();
4530
4531 assert_eq!(report.status, "Current", "did not terminate naturally: {report:?}");
4532 assert_eq!(report.processed_chunks, 2);
4533
4534 let started_total = events.iter().find_map(|event| match event {
4535 ai::ReconcileProgress::Started { total_chunks, .. } => Some(*total_chunks),
4536 _ => None,
4537 });
4538 assert_eq!(started_total, Some(2), "denominator should equal the eligible set");
4539
4540 for event in &events {
4541 if let ai::ReconcileProgress::Batch { processed_chunks, total_chunks, .. } = event {
4542 assert!(
4543 processed_chunks <= total_chunks,
4544 "progress exceeded 100%: {processed_chunks}/{total_chunks}",
4545 );
4546 }
4547 }
4548
4549 fs::remove_dir_all(root).unwrap();
4550 }
4551
4552 #[test]
4553 fn status_counts_only_active_context_chunks() {
4554 let (root, config) = markdown_config(
4555 "# One\nalpha token with enough surrounding detail for embedding eligibility and useful semantic context\n\n# Two\nbeta token with enough surrounding detail for embedding eligibility and useful semantic context\n",
4556 );
4557 let mut db = IndexDatabase::rebuild(&config).unwrap();
4558 db.install_model(ai::HASH_MODEL_ID).unwrap();
4559
4560 let active = db.local_ai_status().unwrap().artifacts.total_chunks;
4561 assert!(active > 0, "expected active chunks, got {active}");
4562
4563 db.set_context("deadbeefdeadbeefdeadbeefdeadbeefdeadbeef", "ghost-worktree").unwrap();
4567 let scoped = db.local_ai_status().unwrap().artifacts;
4568 assert_eq!(scoped.total_chunks, 0, "status ignored active context scope");
4569 assert_eq!(scoped.current, 0);
4570
4571 fs::remove_dir_all(root).unwrap();
4572 }
4573
4574 #[test]
4575 fn watch_maintenance_pass_indexes_new_files() {
4576 let root = unique_temp_root();
4578 let _ = fs::remove_dir_all(&root);
4579 fs::create_dir_all(root.join("src")).unwrap();
4580 fs::write(root.join("src/one.rs"), "pub fn one() {}\n").unwrap();
4581 let config = source_config(root.clone(), Language::Rust);
4582 IndexDatabase::rebuild(&config).unwrap();
4583
4584 fs::write(root.join("src/two.rs"), "pub fn newly_added_symbol() {}\n").unwrap();
4586 crate::watch::maintenance_pass(&config, false).unwrap();
4587
4588 let db = IndexDatabase::open_config(&config).unwrap();
4589 let hits = db.symbols("newly_added_symbol", Some(Language::Rust), 10).unwrap();
4590 assert!(!hits.is_empty(), "watcher pass did not index the new file");
4591
4592 fs::remove_dir_all(root).unwrap();
4593 }
4594
4595 #[test]
4596 fn discover_deletion_is_worktree_scoped() {
4597 let root = unique_temp_root();
4601 let _ = fs::remove_dir_all(&root);
4602 fs::create_dir_all(root.join("src")).unwrap();
4603 fs::write(root.join("src/a.rs"), "pub fn a() {}\n").unwrap();
4604 fs::write(root.join("src/b.rs"), "pub fn b() {}\n").unwrap();
4605 let config = source_config(root.clone(), Language::Rust);
4606 let db = IndexDatabase::rebuild(&config).unwrap();
4607
4608 db.storage
4610 .connection()
4611 .execute(
4612 "INSERT INTO main.files(path, language, kind, sha256, modified_at_ms, generated,
4613 indexed_at_ms, indexed_revision, commit_sha, worktree_id)
4614 VALUES ('src/only_in_other.rs','rust','source','h',0,0,0,'rev','',
4615 'other-worktree')",
4616 [],
4617 )
4618 .unwrap();
4619 drop(db);
4620
4621 fs::remove_file(root.join("src/a.rs")).unwrap();
4623 let db = IndexDatabase::index_discover(&config).unwrap();
4624 let conn = db.storage.connection();
4625
4626 let other: i64 = conn
4628 .query_row(
4629 "SELECT COUNT(*) FROM main.files WHERE worktree_id = 'other-worktree' \
4630 AND kind != 'deleted'",
4631 [],
4632 |row| row.get(0),
4633 )
4634 .unwrap();
4635 assert_eq!(other, 1, "this worktree's pass deleted another worktree's row");
4636
4637 let active = |path: &str| -> i64 {
4640 conn.query_row("SELECT COUNT(*) FROM files WHERE path = ?1", [path], |row| row.get(0))
4641 .unwrap()
4642 };
4643 assert_eq!(active("src/a.rs"), 0, "deleted file still active in own worktree");
4644 assert_eq!(active("src/b.rs"), 1, "live file dropped from own worktree");
4645
4646 fs::remove_dir_all(root).unwrap();
4647 }
4648
4649 #[test]
4650 fn gc_prunes_dead_context_rows_and_keeps_live_ones() {
4651 let (root, config) = markdown_config(
4652 "# One\nalpha token with enough surrounding detail for embedding eligibility and useful semantic context\n\n# Two\nbeta token with enough surrounding detail for embedding eligibility and useful semantic context\n",
4653 );
4654 let db = IndexDatabase::rebuild(&config).unwrap();
4655 db.install_model(ai::HASH_MODEL_ID).unwrap();
4656 db.reconcile(None, Some(8)).unwrap();
4657
4658 let live_files = table_row_count(db.storage.connection(), "files").unwrap();
4659 let live_chunks = table_row_count(db.storage.connection(), "chunks").unwrap();
4660 assert!(live_files > 0 && live_chunks > 0);
4661
4662 db.storage
4664 .connection()
4665 .execute(
4666 "INSERT INTO main.files(path, language, kind, sha256, modified_at_ms, generated,
4667 indexed_at_ms, indexed_revision, commit_sha, worktree_id)
4668 VALUES ('ghost.md','markdown','source','deadhash',0,0,0,'deadrev',
4669 'deadcommit','dead-worktree')",
4670 [],
4671 )
4672 .unwrap();
4673 assert_eq!(table_row_count(db.storage.connection(), "files").unwrap(), live_files + 1);
4674
4675 let live_worktree = db.active_worktree_id.clone();
4677 let report = db.prune_to_live(&[], &[live_worktree]).unwrap();
4678
4679 assert!(!report.skipped);
4680 assert_eq!(report.files_pruned, 1, "ghost not pruned: {report:?}");
4681 assert_eq!(
4682 table_row_count(db.storage.connection(), "files").unwrap(),
4683 live_files,
4684 "live files were pruned",
4685 );
4686 assert_eq!(
4687 table_row_count(db.storage.connection(), "chunks").unwrap(),
4688 live_chunks,
4689 "live chunks were pruned",
4690 );
4691
4692 fs::remove_dir_all(root).unwrap();
4693 }
4694
4695 #[test]
4696 fn gc_refuses_to_prune_with_no_live_context() {
4697 let (root, config) =
4698 markdown_config("# Only\nsome content with enough detail for a chunk\n");
4699 let db = IndexDatabase::rebuild(&config).unwrap();
4700 let before = table_row_count(db.storage.connection(), "files").unwrap();
4701 assert!(before > 0);
4702
4703 let report = db.prune_to_live(&[], &[]).unwrap();
4705 assert!(report.skipped);
4706 assert_eq!(report.files_pruned, 0);
4707 assert_eq!(table_row_count(db.storage.connection(), "files").unwrap(), before);
4708
4709 fs::remove_dir_all(root).unwrap();
4710 }
4711
4712 #[test]
4713 fn reconcile_treats_c_chunks_as_embedding_eligible() {
4714 let root = unique_temp_root();
4715 let _ = fs::remove_dir_all(&root);
4716 fs::create_dir_all(root.join("src")).unwrap();
4717 fs::write(
4718 root.join("src/main.c"),
4719 r#"
4720static int read_sensor_value(int baseline)
4721{
4722 int adjusted = baseline + 42;
4723 return adjusted;
4724}
4725
4726int main(void)
4727{
4728 int sample = read_sensor_value(7);
4729 return sample == 49 ? 0 : 1;
4730}
4731"#,
4732 )
4733 .unwrap();
4734 let config = source_config(root.clone(), Language::C);
4735 let db = IndexDatabase::rebuild(&config).unwrap();
4736 db.install_model(ai::HASH_MODEL_ID).unwrap();
4737
4738 let plan = db.reconcile_plan().unwrap();
4739
4740 assert_eq!(plan.embeddings.skipped_by_policy.get("SkipLanguageUnsupported"), None);
4741 assert!(plan.embeddings.missing > 0, "plan: {:?}", plan.embeddings);
4742
4743 let report = db.reconcile(None, Some(8)).unwrap();
4744 assert!(report.embeddings_written > 0, "report: {report:?}");
4745
4746 fs::remove_dir_all(root).unwrap();
4747 }
4748
4749 #[test]
4750 fn reconcile_policy_skips_tiny_chunks_before_embedding() {
4751 let (root, config) = markdown_config("tiny\n");
4752 let db = IndexDatabase::rebuild(&config).unwrap();
4753 db.install_model(ai::HASH_MODEL_ID).unwrap();
4754
4755 let plan = db.reconcile_plan().unwrap();
4756 assert_eq!(plan.embeddings.missing, 0);
4757 assert_eq!(plan.embeddings.skipped_by_policy.get("SkipTooSmall"), Some(&1));
4758
4759 let report = db.reconcile(None, Some(8)).unwrap();
4760 assert_eq!(report.embeddings_written, 0);
4761 assert_eq!(report.skipped_by_policy.get("SkipTooSmall"), Some(&1));
4762 assert_eq!(db.current_embedding_count(ai::HASH_MODEL_ID).unwrap(), 0);
4763
4764 fs::remove_dir_all(root).unwrap();
4765 }
4766
4767 #[test]
4768 fn reconcile_plan_reports_policy_skips_for_fastembed_model() {
4769 let (root, config) = markdown_config("tiny\n");
4770 let db = IndexDatabase::rebuild(&config).unwrap();
4771 db.storage
4772 .connection()
4773 .execute(
4774 "UPDATE ai_models
4775 SET installed = 1, disabled = 0, status = 'Ready', embedding_dim = ?2
4776 WHERE model_id = ?1",
4777 params![
4778 ai::FASTEMBED_MODEL_ID,
4779 i64::try_from(ai::FASTEMBED_EMBEDDING_DIM).unwrap()
4780 ],
4781 )
4782 .unwrap();
4783 db.storage
4784 .connection()
4785 .execute(
4786 "INSERT INTO index_meta(key, value) VALUES ('active_embedding_model', ?1)
4787 ON CONFLICT(key) DO UPDATE SET value = excluded.value",
4788 [ai::FASTEMBED_MODEL_ID],
4789 )
4790 .unwrap();
4791
4792 let plan = db.reconcile_plan().unwrap();
4793
4794 assert_eq!(plan.embeddings.model_id, ai::FASTEMBED_MODEL_ID);
4795 assert_eq!(plan.embeddings.missing, 0);
4796 assert_eq!(plan.embeddings.skipped_by_policy.get("SkipTooSmall"), Some(&1));
4797
4798 fs::remove_dir_all(root).unwrap();
4799 }
4800
4801 #[cfg(not(feature = "fastembed"))]
4802 #[test]
4803 fn blocked_fastembed_reconcile_still_reports_policy_skips() {
4804 let (root, config) = markdown_config("tiny\n");
4805 let db = IndexDatabase::rebuild(&config).unwrap();
4806 db.storage
4807 .connection()
4808 .execute(
4809 "INSERT INTO index_meta(key, value) VALUES ('active_embedding_model', ?1)
4810 ON CONFLICT(key) DO UPDATE SET value = excluded.value",
4811 [ai::FASTEMBED_MODEL_ID],
4812 )
4813 .unwrap();
4814
4815 let report = db.reconcile(None, Some(8)).unwrap();
4816
4817 assert_eq!(report.status, "Blocked");
4818 assert_eq!(report.skipped_by_policy.get("SkipTooSmall"), Some(&1));
4819
4820 fs::remove_dir_all(root).unwrap();
4821 }
4822
4823 #[test]
4824 fn search_explain_reports_weighted_score_components() {
4825 let (root, config) = markdown_config(
4826 "alpha runtime shutdown\nsecond line with enough detail for embedding eligibility and semantic vector scoring\nthird line\n",
4827 );
4828 let db = IndexDatabase::rebuild(&config).unwrap();
4829 db.install_model(ai::HASH_MODEL_ID).unwrap();
4830 db.reconcile(None, Some(8)).unwrap();
4831
4832 let hits = db.search_explain("runtime shutdown", 10, false).unwrap();
4833
4834 assert_eq!(hits.len(), 1);
4835 let components = hits[0].score_components.as_ref().unwrap();
4836 let component_sum = components.bm25
4837 + components.vector
4838 + components.symbol
4839 + components.graph
4840 + components.git
4841 + components.github;
4842 assert!((hits[0].score - crate::query::round_score(component_sum)).abs() < 1e-9);
4844 assert!(components.bm25 > 0.0);
4845 assert!(components.vector > 0.0);
4846 assert!(components.vector_note.is_none());
4847 assert!(components.bm25 <= 0.45);
4848 assert!(components.vector <= 0.35);
4849 assert!(components.symbol <= 0.10);
4850 assert!(components.graph <= 0.05);
4851 assert!(components.git <= 0.03);
4852 assert!(components.github <= 0.02);
4853 assert!(db.search("runtime shutdown", 10, false).unwrap()[0].score_components.is_none());
4854
4855 fs::remove_dir_all(root).unwrap();
4856 }
4857
4858 #[test]
4859 fn search_explain_labels_missing_vector_runtime() {
4860 let (root, config) = markdown_config(
4861 "alpha runtime shutdown\nsecond line with enough detail for lexical search without embeddings\nthird line\n",
4862 );
4863 let db = IndexDatabase::rebuild(&config).unwrap();
4864
4865 let hits = db.search_explain("runtime shutdown", 10, false).unwrap();
4866
4867 assert_eq!(hits.len(), 1);
4868 let components = hits[0].score_components.as_ref().unwrap();
4869 assert!(components.bm25 > 0.0);
4870 assert_eq!(components.vector, 0.0);
4871 assert_eq!(
4872 components.vector_note.as_deref(),
4873 Some("vector search unavailable: no current embedding model")
4874 );
4875
4876 fs::remove_dir_all(root).unwrap();
4877 }
4878
4879 #[test]
4880 fn git_history_indexes_commits_paths_queries_and_blame() {
4881 let root = unique_temp_root();
4882 let _ = fs::remove_dir_all(&root);
4883 fs::create_dir_all(root.join("docs")).unwrap();
4884 fs::create_dir_all(root.join("src")).unwrap();
4885 run_git(&root, &["init"]);
4886 run_git(&root, &["config", "user.name", "Rag Rat"]);
4887 run_git(&root, &["config", "user.email", "rag@example.com"]);
4888
4889 fs::write(root.join("docs/search.md"), "# Title\nalpha token\n").unwrap();
4890 fs::write(root.join("src/lib.rs"), "pub fn tracked_symbol() {}\n").unwrap();
4891 run_git(&root, &["add", "."]);
4892 run_git(&root, &["commit", "-m", "Add alpha docs"]);
4893
4894 fs::write(root.join("docs/search.md"), "# Title\nbeta token\n").unwrap();
4895 run_git(&root, &["add", "."]);
4896 run_git(&root, &["commit", "-m", "Refresh beta docs"]);
4897
4898 let config = Config {
4899 root: root.clone(),
4900 database: root.join(".rag-rat/index.sqlite"),
4901 targets: vec![
4902 ResolvedTarget {
4903 name: "markdown".to_string(),
4904 language: Language::Markdown,
4905 directories: vec![PathBuf::from("docs")],
4906 include: vec!["**/*.md".to_string()],
4907 exclude: Vec::new(),
4908 kind: TargetKind::Docs,
4909 },
4910 ResolvedTarget {
4911 name: "rust".to_string(),
4912 language: Language::Rust,
4913 directories: vec![PathBuf::from("src")],
4914 include: vec!["**/*.rs".to_string()],
4915 exclude: Vec::new(),
4916 kind: TargetKind::Source,
4917 },
4918 ],
4919 local_ai: Default::default(),
4920 watch: Default::default(),
4921 };
4922 let db = IndexDatabase::rebuild(&config).unwrap();
4923 let status = db.status(&config.database).unwrap();
4924 assert!(status.git_history.available);
4925 assert!(status.git_history.head.is_some());
4926 assert_eq!(status.git_history.indexed_head, status.git_history.head);
4927 assert_eq!(status.git_history.commit_count, 2);
4928 assert_eq!(status.git_history.file_change_count, 3);
4929
4930 let commit_hits = db.commit_search("beta", 10).unwrap();
4931 assert_eq!(commit_hits.len(), 1);
4932 assert_eq!(commit_hits[0].subject, "Refresh beta docs");
4933 assert_eq!(commit_hits[0].evidence_kind, "historical");
4934 assert!(commit_hits[0].score > 0.0);
4935
4936 let path_history = db.git_history_for_path("docs/search.md", 10).unwrap();
4937 assert_eq!(path_history.len(), 2);
4938 assert!(path_history.iter().all(|item| item.evidence_kind == "historical"));
4939
4940 let symbol_history =
4941 db.git_history_for_symbol("tracked_symbol", Some(Language::Rust), 10).unwrap();
4942 assert_eq!(symbol_history.len(), 1);
4943 assert_eq!(symbol_history[0].path, "src/lib.rs");
4944 assert_eq!(symbol_history[0].evidence_kind, "historical");
4945 let impact = db.impact_surface("tracked_symbol", 10).unwrap();
4946 assert!(impact.iter().any(|item| {
4947 item.category == "Direct structural impact" && item.reason == "exact_symbol_definition"
4948 }));
4949 assert!(impact.iter().any(|item| {
4950 item.category == "Historical/papertrail evidence"
4951 && item.reason == "git_commit_touched_file"
4952 }));
4953
4954 let query_commits = db.commits_touching_query("beta", 10).unwrap();
4955 let beta_commit =
4956 query_commits.iter().find(|hit| hit.subject == "Refresh beta docs").unwrap();
4957 assert!(beta_commit.evidence.iter().any(|value| value == "commit_message"));
4958 assert!(beta_commit.evidence.iter().any(|value| value == "file_change"));
4959 assert_eq!(beta_commit.evidence_kind, "historical");
4960
4961 let chunk_id = first_chunk_id(&db);
4962 let blame = db.git_blame_chunk(chunk_id).unwrap().unwrap();
4963 assert_eq!(blame.source_text_hash, hex_sha256("# Title\nbeta token\n".as_bytes()));
4964 assert_eq!(blame.line_count, 2);
4965 assert_eq!(blame.commit_counts.values().sum::<i64>(), 2);
4966 assert!(blame.dominant_commit_lines >= 1);
4967 assert!(blame.dominant_commit.is_some());
4968 assert_eq!(blame.evidence_kind, "historical");
4969 let cached = db.git_blame_chunk(chunk_id).unwrap().unwrap();
4970 assert_eq!(cached.source_text_hash, blame.source_text_hash);
4971
4972 fs::remove_dir_all(root).unwrap();
4973 }
4974
4975 #[test]
4976 fn indexes_rust_graph_edges_from_tree_sitter() {
4977 let root = unique_temp_root();
4978 let _ = fs::remove_dir_all(&root);
4979 fs::create_dir_all(root.join("src")).unwrap();
4980 fs::write(
4981 root.join("src/lib.rs"),
4982 r#"
4983use crate::worker::Worker;
4984mod worker;
4985
4986trait Service {
4987 fn serve(&self);
4988}
4989
4990struct Worker;
4991
4992impl Service for Worker {
4993 fn serve(&self) {
4994 helper();
4995 }
4996}
4997
4998fn helper() {}
4999
5000fn caller() {
5001 helper();
5002 Worker.serve();
5003}
5004"#,
5005 )
5006 .unwrap();
5007 let config = source_config(root.clone(), Language::Rust);
5008 let db = IndexDatabase::rebuild(&config).unwrap();
5009
5010 assert_edge(&db, "caller", "helper", "calls_name", "Syntactic");
5011 assert_edge(&db, "Worker", "Service", "implements", "Syntactic");
5012 assert_edge(&db, "src/lib.rs", "worker", "imports", "Syntactic");
5013 let callers = db.find_callers("helper", 10).unwrap();
5014 assert!(
5015 callers.iter().any(|edge| {
5016 edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("caller"))
5017 && edge.edge_kind == "calls_name"
5018 }),
5019 "helper callers: {callers:?}"
5020 );
5021
5022 fs::remove_dir_all(root).unwrap();
5023 }
5024
5025 #[test]
5026 fn ffi_surface_labels_exported_impl_members_separately() {
5027 let root = unique_temp_root();
5028 let _ = fs::remove_dir_all(&root);
5029 fs::create_dir_all(root.join("src")).unwrap();
5030 fs::write(
5031 root.join("src/lib.rs"),
5032 r#"
5033pub struct PhraseRepo;
5034
5035#[uniffi::export]
5036impl PhraseRepo {
5037 pub fn children(&self) {}
5038 pub fn journal(&self) {}
5039}
5040
5041#[cfg_attr(not(target_arch = "wasm32"), uniffi::export(async_runtime = "tokio"))]
5042impl Runtime {
5043 pub fn route_search_query(&self) {}
5044}
5045
5046pub struct Runtime;
5047
5048/// Not #[uniffi::export]: this is an internal helper.
5049pub fn internal_helper() {}
5050
5051#[cfg_attr(target_arch = "wasm32", ::uniffi::export)]
5052pub fn exported_fn() {}
5053"#,
5054 )
5055 .unwrap();
5056 let config = source_config(root.clone(), Language::Rust);
5057 let db = IndexDatabase::rebuild(&config).unwrap();
5058
5059 let surface = db.ffi_surface(20).unwrap();
5060 assert!(
5061 surface.iter().any(|item| {
5062 item.reason == "rust_uniffi_export"
5063 && item.symbol.as_deref().is_some_and(|symbol| symbol.ends_with("exported_fn"))
5064 }),
5065 "direct export should remain direct: {surface:?}"
5066 );
5067 assert!(
5068 surface.iter().any(|item| item.reason == "rust_uniffi_exported_impl"),
5069 "exported impl/type surface should be explicit: {surface:?}"
5070 );
5071 assert!(
5072 surface.iter().any(|item| {
5073 item.reason == "rust_uniffi_impl_member"
5074 && item
5075 .symbol
5076 .as_deref()
5077 .is_some_and(|symbol| symbol.ends_with("route_search_query"))
5078 }),
5079 "cfg_attr exported impl member should be labeled separately: {surface:?}"
5080 );
5081 assert!(
5082 surface.iter().any(|item| {
5083 item.reason == "rust_uniffi_impl_member"
5084 && item.symbol.as_deref().is_some_and(|symbol| symbol.ends_with("children"))
5085 }),
5086 "impl member should be labeled separately: {surface:?}"
5087 );
5088 assert!(
5089 !surface.iter().any(|item| {
5090 item.reason == "rust_uniffi_export"
5091 && item.symbol.as_deref().is_some_and(|symbol| {
5092 symbol.ends_with("children") || symbol.ends_with("journal")
5093 })
5094 }),
5095 "impl members must not be reported as direct exports: {surface:?}"
5096 );
5097 assert!(
5098 !surface.iter().any(|item| {
5099 item.symbol.as_deref().is_some_and(|symbol| symbol.ends_with("internal_helper"))
5100 }),
5101 "comment-only UniFFI mentions must not create FFI surface rows: {surface:?}"
5102 );
5103
5104 fs::remove_dir_all(root).unwrap();
5105 }
5106
5107 #[test]
5108 fn find_callers_sees_calls_in_let_bindings() {
5109 let root = unique_temp_root();
5112 let _ = fs::remove_dir_all(&root);
5113 fs::create_dir_all(root.join("src")).unwrap();
5114 fs::write(
5115 root.join("src/lib.rs"),
5116 "pub fn target() -> Option<i32> {\n Some(1)\n}\n\n\
5117 pub fn via_statement() {\n target();\n}\n\n\
5118 pub fn via_let() {\n let _x = target();\n}\n\n\
5119 pub fn via_let_else() {\n let Some(_x) = target() else {\n return;\n };\n}\n",
5120 )
5121 .unwrap();
5122 let config = source_config(root.clone(), Language::Rust);
5123 let db = IndexDatabase::rebuild(&config).unwrap();
5124
5125 let callers = db.find_callers("target", 50).unwrap();
5126 let names: Vec<String> = callers.iter().filter_map(|hop| hop.from_symbol.clone()).collect();
5127 let has = |suffix: &str| names.iter().any(|name| name.ends_with(suffix));
5128
5129 assert!(has("via_statement"), "missing plain-statement caller; got {names:?}");
5130 assert!(has("via_let"), "missing `let x = target()` caller; got {names:?}");
5131 assert!(has("via_let_else"), "missing `let-else` caller; got {names:?}");
5132
5133 fs::remove_dir_all(root).unwrap();
5134 }
5135
5136 #[test]
5137 fn search_and_read_chunk_attach_bounded_graph_evidence() {
5138 let root = unique_temp_root();
5139 let _ = fs::remove_dir_all(&root);
5140 fs::create_dir_all(root.join("src")).unwrap();
5141 fs::write(
5142 root.join("src/lib.rs"),
5143 "pub fn helper() {}\n\npub fn caller() {\n helper();\n}\n",
5144 )
5145 .unwrap();
5146 let config = source_config(root.clone(), Language::Rust);
5147 let db = IndexDatabase::rebuild(&config).unwrap();
5148
5149 let hits = db.search("helper caller", 10, false).unwrap();
5150 let helper_hit = hits
5151 .iter()
5152 .find(|hit| hit.symbol_path.as_deref().is_some_and(|path| path.ends_with("helper")))
5153 .expect("helper search hit");
5154 let helper_graph = helper_hit.graph.as_ref().expect("helper graph evidence");
5155 assert_eq!(helper_graph.caller_count, 1);
5156 assert!(helper_graph.top_callers.iter().any(|caller| {
5157 caller.symbol_path.ends_with("caller")
5158 && caller.callsite.line == 4
5159 && caller.callsite.span == [4, 4]
5160 && caller.confidence == "syntactic"
5161 }));
5162 assert!(helper_graph.callers.is_empty(), "search keeps graph compact");
5163
5164 let caller_hit = hits
5165 .iter()
5166 .find(|hit| hit.symbol_path.as_deref().is_some_and(|path| path.ends_with("caller")))
5167 .expect("caller search hit");
5168 let caller_graph = caller_hit.graph.as_ref().expect("caller graph evidence");
5169 assert!(caller_graph.top_callees.iter().any(|callee| {
5170 callee.target == "helper"
5171 && callee.callsite.line == 4
5172 && callee.callsite.span == [4, 4]
5173 && callee.confidence == "syntactic"
5174 }));
5175
5176 let chunk = db.read_chunk(caller_hit.chunk_id).unwrap().expect("caller chunk");
5177 let full_graph = chunk.graph.as_ref().expect("full read_chunk graph");
5178 assert!(full_graph.symbol.as_ref().is_some_and(|symbol| symbol.name == "caller"));
5179 assert!(
5180 full_graph
5181 .callees
5182 .iter()
5183 .any(|callee| callee.target == "helper" && callee.callsite.line == 4)
5184 );
5185 assert!(full_graph.notes.iter().any(|note| note.contains("tree-sitter/syntactic")));
5186
5187 fs::remove_dir_all(root).unwrap();
5188 }
5189
5190 #[test]
5191 fn graph_exact_mode_requires_verified_symbol_identity() {
5192 let root = unique_temp_root();
5193 let _ = fs::remove_dir_all(&root);
5194 fs::create_dir_all(root.join("src")).unwrap();
5195 fs::write(
5196 root.join("src/lib.rs"),
5197 "pub fn helper() {}\n\npub fn caller() {\n helper();\n}\n",
5198 )
5199 .unwrap();
5200 let config = source_config(root.clone(), Language::Rust);
5201 let db = IndexDatabase::rebuild(&config).unwrap();
5202 let helper = db.symbols("helper", Some(Language::Rust), 10).unwrap().remove(0);
5203 let caller = db.symbols("caller", Some(Language::Rust), 10).unwrap().remove(0);
5204
5205 let bare_exact = db
5206 .find_callers_with_options(
5207 "helper",
5208 10,
5209 &crate::query::graph::GraphTraversalOptions {
5210 resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
5211 ..Default::default()
5212 },
5213 )
5214 .unwrap();
5215 assert!(bare_exact.is_empty(), "bare exact lookup should not fall back: {bare_exact:?}");
5216
5217 let exact_callers = db
5218 .find_callers_with_options(
5219 "helper",
5220 10,
5221 &crate::query::graph::GraphTraversalOptions {
5222 resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
5223 symbol_id: Some(helper.symbol_id),
5224 ..Default::default()
5225 },
5226 )
5227 .unwrap();
5228 assert!(
5229 exact_callers.iter().any(|edge| {
5230 edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("caller"))
5231 && edge.verified_target_symbol
5232 }),
5233 "exact callers: {exact_callers:?}"
5234 );
5235 assert!(exact_callers.iter().all(|edge| edge.verified_target_symbol));
5236
5237 let exact_callees = db
5238 .trace_callees_with_options(
5239 "caller",
5240 10,
5241 &crate::query::graph::GraphTraversalOptions {
5242 resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
5243 symbol_id: Some(caller.symbol_id),
5244 ..Default::default()
5245 },
5246 )
5247 .unwrap();
5248 assert!(
5249 exact_callees.iter().any(|edge| {
5250 edge.target.as_deref() == Some("helper") && edge.verified_target_symbol
5251 }),
5252 "exact callees: {exact_callees:?}"
5253 );
5254 assert!(exact_callees.iter().all(|edge| edge.verified_target_symbol));
5255
5256 fs::remove_dir_all(root).unwrap();
5257 }
5258
5259 #[test]
5260 fn symbol_lookup_ranks_type_definitions_before_impl_blocks() {
5261 let root = unique_temp_root();
5262 let _ = fs::remove_dir_all(&root);
5263 fs::create_dir_all(root.join("src")).unwrap();
5264 fs::write(
5265 root.join("src/lib.rs"),
5266 r#"
5267impl Database {
5268 pub fn open() -> Self {
5269 Database
5270 }
5271}
5272
5273pub struct Database;
5274"#,
5275 )
5276 .unwrap();
5277 let config = source_config(root.clone(), Language::Rust);
5278 let db = IndexDatabase::rebuild(&config).unwrap();
5279 let hits = db.symbols("Database", Some(Language::Rust), 10).unwrap();
5280 assert!(hits.len() >= 2, "fixture should expose both impl and struct symbols: {hits:?}");
5281 assert_eq!(hits[0].kind, "struct", "Database lookup should prefer type definition");
5282 assert!(
5283 hits.iter().any(|hit| hit.kind == "impl"),
5284 "impl Database should still be available after the struct: {hits:?}"
5285 );
5286
5287 fs::remove_dir_all(root).unwrap();
5288 }
5289
5290 #[test]
5291 fn distinct_same_named_methods_do_not_merge_and_logical_ids_are_stable() {
5292 let root = unique_temp_root();
5296 let _ = fs::remove_dir_all(&root);
5297 fs::create_dir_all(root.join("src")).unwrap();
5298 fs::write(
5299 root.join("src/lib.rs"),
5300 r#"
5301pub struct A;
5302pub struct B;
5303
5304impl A {
5305 pub fn new(name: String) -> Self { A }
5306}
5307
5308impl B {
5309 pub fn new(count: usize, flag: bool) -> Self { B }
5310}
5311"#,
5312 )
5313 .unwrap();
5314 let config = source_config(root.clone(), Language::Rust);
5315 let db = IndexDatabase::rebuild(&config).unwrap();
5316
5317 let selector = crate::query::symbol::SymbolSelector {
5318 logical_symbol_id: None,
5319 symbol_id: None,
5320 symbol_path: None,
5321 symbol: Some("new".to_string()),
5322 language: Some(Language::Rust),
5323 allow_ambiguous: true,
5324 limit: 10,
5325 };
5326 let lookup = db.symbol_candidates(&selector).unwrap();
5327 let new_candidates: Vec<_> =
5328 lookup.candidates.iter().filter(|candidate| candidate.name == "new").collect();
5329 assert_eq!(new_candidates.len(), 2, "both constructors present: {new_candidates:?}");
5330 let logical_ids: std::collections::BTreeSet<i64> =
5331 new_candidates.iter().filter_map(|candidate| candidate.logical_symbol_id).collect();
5332 assert_eq!(logical_ids.len(), 2, "distinct signatures get distinct logical ids");
5333 for candidate in &new_candidates {
5334 assert_eq!(
5335 candidate.logical_group_reason.as_deref(),
5336 Some("single"),
5337 "differently-signed methods are not cfg variants: {candidate:?}"
5338 );
5339 }
5340
5341 let db = IndexDatabase::rebuild(&config).unwrap();
5343 let relookup = db.symbol_candidates(&selector).unwrap();
5344 let reindexed_ids: std::collections::BTreeSet<i64> = relookup
5345 .candidates
5346 .iter()
5347 .filter(|candidate| candidate.name == "new")
5348 .filter_map(|candidate| candidate.logical_symbol_id)
5349 .collect();
5350 assert_eq!(reindexed_ids, logical_ids, "logical ids must be stable across reindex");
5351
5352 fs::remove_dir_all(root).unwrap();
5353 }
5354
5355 #[test]
5356 fn logical_symbol_exact_mode_covers_duplicate_rust_variants() {
5357 let root = unique_temp_root();
5358 let _ = fs::remove_dir_all(&root);
5359 fs::create_dir_all(root.join("src")).unwrap();
5360 fs::write(
5361 root.join("src/lib.rs"),
5362 r#"
5363#[cfg(not(target_arch = "wasm32"))]
5364pub fn spawn_blocking() {}
5365
5366#[cfg(target_arch = "wasm32")]
5367pub fn spawn_blocking() {}
5368
5369pub fn caller() {
5370 spawn_blocking();
5371}
5372"#,
5373 )
5374 .unwrap();
5375 let config = source_config(root.clone(), Language::Rust);
5376 let db = IndexDatabase::rebuild(&config).unwrap();
5377 let lookup = db
5378 .symbol_candidates(&crate::query::symbol::SymbolSelector {
5379 logical_symbol_id: None,
5380 symbol_id: None,
5381 symbol_path: None,
5382 symbol: Some("spawn_blocking".to_string()),
5383 language: Some(Language::Rust),
5384 allow_ambiguous: true,
5385 limit: 10,
5386 })
5387 .unwrap();
5388 let logical_symbol_id = lookup.candidates[0].logical_symbol_id.expect("logical id");
5389 assert_eq!(lookup.candidates[0].logical_variant_count, Some(2));
5390 assert_eq!(lookup.candidates[0].logical_group_reason.as_deref(), Some("cfg_variant"));
5391
5392 let exact_variant_callers = db
5393 .find_callers_with_options(
5394 "spawn_blocking",
5395 10,
5396 &crate::query::graph::GraphTraversalOptions {
5397 resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
5398 symbol_id: Some(lookup.candidates[1].symbol_id),
5399 ..Default::default()
5400 },
5401 )
5402 .unwrap();
5403 assert!(
5404 exact_variant_callers.iter().any(|edge| {
5405 edge.from_symbol.as_deref().is_some_and(|symbol| symbol.ends_with("caller"))
5406 && edge.target.as_deref() == Some("spawn_blocking")
5407 && edge.verified_target_symbol
5408 }),
5409 "symbol_id exact should include its logical cfg group: {exact_variant_callers:?}"
5410 );
5411 assert!(exact_variant_callers.iter().all(|edge| edge.verified_target_symbol));
5412
5413 let exact_logical = db
5414 .graph_traversal_report(
5415 "find_callers",
5416 &lookup.candidates[0],
5417 true,
5418 10,
5419 &crate::query::graph::GraphTraversalOptions {
5420 resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
5421 symbol_id: Some(lookup.candidates[0].symbol_id),
5422 ..Default::default()
5423 },
5424 )
5425 .unwrap();
5426 assert_eq!(exact_logical.query.logical_symbol_id, Some(logical_symbol_id));
5427 assert_eq!(
5428 exact_logical.logical_symbol.as_ref().map(|symbol| symbol.variant_count),
5429 Some(2)
5430 );
5431 assert_eq!(exact_logical.variants.len(), 2);
5432 assert!(exact_logical.results.iter().all(|edge| edge.verified_target_symbol));
5433 assert!(
5434 exact_logical.results.iter().any(|edge| {
5435 edge.from_symbol.as_deref().is_some_and(|symbol| symbol.ends_with("caller"))
5436 && edge.target.as_deref() == Some("spawn_blocking")
5437 }),
5438 "logical exact callers: {exact_logical:?}"
5439 );
5440
5441 fs::remove_dir_all(root).unwrap();
5442 }
5443
5444 #[test]
5445 fn indexes_real_world_rust_graph_patterns() {
5446 let root = fixture_temp_root("graph-realworld/rust");
5447 let config = source_config(root.clone(), Language::Rust);
5448 let db = IndexDatabase::rebuild(&config).unwrap();
5449
5450 assert_edge(&db, "src/lib.rs", "worker", "imports", "Syntactic");
5451 assert_edge(&db, "src/lib.rs", "Worker", "exports", "Syntactic");
5452 assert_edge(&db, "entry", "new", "calls_name", "NameOnly");
5453 assert_edge(&db, "entry", "Client", "references_type", "Syntactic");
5454 assert_edge(&db, "drive", "serve", "calls_name", "NameOnly");
5455 assert_edge(&db, "drive", "GenericRunner", "references_type", "Syntactic");
5456 assert_edge(&db, "Worker", "Service", "implements", "Syntactic");
5457 assert_edge(&db, "generic_call", "T", "references_type", "NameOnly");
5458 assert_edge(&db, "entry", "generated_call", "uses_macro", "NameOnly");
5459 let syntactic_callers = db.find_callers("serve", 10).unwrap();
5460 assert!(
5461 syntactic_callers.is_empty(),
5462 "syntactic serve callers should avoid receiver/name fallback: {syntactic_callers:?}"
5463 );
5464 let callers = db
5465 .find_callers_with_options(
5466 "serve",
5467 10,
5468 &crate::query::graph::GraphTraversalOptions {
5469 resolution_mode: crate::query::graph::GraphResolutionMode::Fuzzy,
5470 ..Default::default()
5471 },
5472 )
5473 .unwrap();
5474 assert!(
5475 callers.iter().any(|edge| {
5476 edge.edge_kind == "calls_name"
5477 && edge.edge_confidence == edge.confidence
5478 && edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("drive"))
5479 }),
5480 "serve callers: {callers:?}"
5481 );
5482
5483 fs::remove_dir_all(root).unwrap();
5484 }
5485
5486 #[test]
5487 fn indexes_typescript_graph_edges_from_tree_sitter() {
5488 let root = unique_temp_root();
5489 let _ = fs::remove_dir_all(&root);
5490 fs::create_dir_all(root.join("src")).unwrap();
5491 fs::write(
5492 root.join("src/helper.ts"),
5493 "export function helper() {}\nexport const Card = () => null;\n",
5494 )
5495 .unwrap();
5496 fs::write(
5497 root.join("src/App.tsx"),
5498 r#"
5499import { helper, Card } from "./helper";
5500
5501export function run() {
5502 helper();
5503 return <Card />;
5504}
5505
5506export const callRun = () => run();
5507"#,
5508 )
5509 .unwrap();
5510 let config = source_config(root.clone(), Language::TypeScript);
5511 let db = IndexDatabase::rebuild(&config).unwrap();
5512
5513 assert_edge(&db, "run", "helper", "calls_name", "Syntactic");
5514 assert_edge(&db, "run", "Card", "references_type", "Syntactic");
5515 assert_edge(&db, "src/App.tsx", "helper", "imports", "Syntactic");
5516 assert_edge(&db, "src/App.tsx", "run", "exports", "Syntactic");
5517 let callees = db.trace_callees("callRun", 10).unwrap();
5518 assert!(
5519 callees.iter().any(|edge| {
5520 edge.to_symbol.as_deref().is_some_and(|name| name.ends_with("run"))
5521 && edge.confidence == "syntactic"
5522 }),
5523 "callRun callees: {callees:?}"
5524 );
5525
5526 fs::remove_dir_all(root).unwrap();
5527 }
5528
5529 #[test]
5530 fn indexes_c_graph_edges_from_tree_sitter() {
5531 let root = unique_temp_root();
5532 let _ = fs::remove_dir_all(&root);
5533 fs::create_dir_all(root.join("src")).unwrap();
5534 fs::write(
5535 root.join("src/runtime.c"),
5536 r#"
5537typedef struct Runtime Runtime;
5538
5539struct Runtime {
5540 int state;
5541};
5542
5543int helper(Runtime *runtime) {
5544 return runtime->state;
5545}
5546
5547int runtime_open(Runtime *runtime) {
5548 return helper(runtime);
5549}
5550"#,
5551 )
5552 .unwrap();
5553 let config = source_config(root.clone(), Language::C);
5554 let db = IndexDatabase::rebuild(&config).unwrap();
5555
5556 assert_edge(&db, "runtime_open", "helper", "calls_name", "Syntactic");
5557
5558 fs::remove_dir_all(root).unwrap();
5559 }
5560
5561 #[test]
5562 fn indexes_c_file_scope_macro_regions_for_search() {
5563 let root = unique_temp_root();
5564 let _ = fs::remove_dir_all(&root);
5565 fs::create_dir_all(root.join("drivers/entropy")).unwrap();
5566 fs::write(
5567 root.join("drivers/entropy/entropy.c"),
5568 r#"
5569static int entropy_init(const struct device *dev)
5570{
5571 ARG_UNUSED(dev);
5572 return 0;
5573}
5574
5575/* Entropy driver APIs structure */
5576static DEVICE_API(entropy, entropy_cryptoacc_trng_api) = {
5577 .get_entropy = entropy_cryptoacc_trng_get_entropy,
5578};
5579
5580DEVICE_DT_INST_DEFINE(0, entropy_init, NULL, NULL, NULL,
5581 PRE_KERNEL_1, CONFIG_ENTROPY_INIT_PRIORITY,
5582 &entropy_cryptoacc_trng_api);
5583"#,
5584 )
5585 .unwrap();
5586 let config = Config {
5587 root: root.clone(),
5588 database: root.join(".rag-rat/index.sqlite"),
5589 targets: vec![ResolvedTarget {
5590 name: "c".to_string(),
5591 language: Language::C,
5592 directories: vec![PathBuf::from("drivers/entropy")],
5593 include: vec!["**/*.c".to_string()],
5594 exclude: Vec::new(),
5595 kind: TargetKind::Source,
5596 }],
5597 local_ai: Default::default(),
5598 watch: Default::default(),
5599 };
5600 let db = IndexDatabase::rebuild(&config).unwrap();
5601
5602 let hits = db.search("DEVICE_API", 5, false).unwrap();
5603 assert!(
5604 hits.iter().any(|hit| {
5605 hit.path == "drivers/entropy/entropy.c" && hit.summary.contains("DEVICE_API")
5606 }),
5607 "DEVICE_API hits: {hits:?}"
5608 );
5609
5610 fs::remove_dir_all(root).unwrap();
5611 }
5612
5613 #[test]
5614 fn indexes_cpp_graph_edges_from_tree_sitter() {
5615 let root = unique_temp_root();
5616 let _ = fs::remove_dir_all(&root);
5617 fs::create_dir_all(root.join("src")).unwrap();
5618 fs::write(
5619 root.join("src/runtime.cpp"),
5620 r#"
5621namespace held {
5622class Runtime {
5623public:
5624 void open();
5625};
5626
5627void helper() {}
5628
5629void Runtime::open() {
5630 helper();
5631}
5632}
5633"#,
5634 )
5635 .unwrap();
5636 let config = source_config(root.clone(), Language::Cpp);
5637 let db = IndexDatabase::rebuild(&config).unwrap();
5638
5639 assert_edge(&db, "open", "helper", "calls_name", "Syntactic");
5640
5641 fs::remove_dir_all(root).unwrap();
5642 }
5643
5644 #[test]
5645 fn indexes_real_world_typescript_graph_patterns() {
5646 let root = fixture_temp_root("graph-realworld/typescript");
5647 let config = source_config(root.clone(), Language::TypeScript);
5648 let db = IndexDatabase::rebuild(&config).unwrap();
5649
5650 assert_edge(&db, "src/lib.tsx", "DefaultWidget", "imports", "Syntactic");
5651 assert_edge(&db, "src/lib.tsx", "WidgetNS", "imports", "NameOnly");
5652 assert_edge(&db, "src/lib.tsx", "WidgetProps", "imports", "Syntactic");
5653 assert_edge(&db, "src/lib.tsx", "ReExportedWidget", "exports", "NameOnly");
5654 assert_edge(&db, "useWidget", "useMemo", "calls_name", "NameOnly");
5655 assert_edge(&db, "useWidget", "DefaultWidget", "calls_name", "Syntactic");
5656 assert_edge(&db, "Shell", "renderWidget", "calls_name", "NameOnly");
5657 assert_edge(&db, "Shell", "WidgetNS", "references_type", "NameOnly");
5658 assert_edge(&db, "Shell", "DefaultWidget", "references_type", "Syntactic");
5659 assert_edge(&db, "DefaultWidget", "WidgetProps", "references_type", "Syntactic");
5660 let callees = db
5661 .trace_callees_with_options(
5662 "Shell",
5663 10,
5664 &crate::query::graph::GraphTraversalOptions {
5665 include_references: true,
5666 edge_kinds: None,
5667 ..Default::default()
5668 },
5669 )
5670 .unwrap();
5671 assert!(
5672 callees.iter().any(|edge| {
5673 edge.edge_kind == "references_type"
5674 && edge.edge_confidence == edge.confidence
5675 && edge.to_symbol.as_deref().is_some_and(|name| name.ends_with("DefaultWidget"))
5676 }),
5677 "Shell callees: {callees:?}"
5678 );
5679
5680 fs::remove_dir_all(root).unwrap();
5681 }
5682
5683 #[test]
5684 fn rust_macro_edges_do_not_resolve_to_same_named_modules() {
5685 let root = unique_temp_root();
5686 let _ = fs::remove_dir_all(&root);
5687 fs::create_dir_all(root.join("src")).unwrap();
5688 fs::write(
5689 root.join("src/lib.rs"),
5690 r#"
5691mod format;
5692
5693fn execute_one() {
5694 let _value = format!("hello");
5695}
5696"#,
5697 )
5698 .unwrap();
5699 fs::write(root.join("src/format.rs"), "pub fn helper() {}\n").unwrap();
5700 let config = source_config(root.clone(), Language::Rust);
5701 let db = IndexDatabase::rebuild(&config).unwrap();
5702
5703 let edge = db
5704 .storage
5705 .connection()
5706 .query_row(
5707 "
5708 SELECT edge_kind, to_name, to_symbol_id, confidence, resolution, evidence
5709 FROM edges
5710 WHERE edge_kind = 'uses_macro'
5711 AND to_name = 'format'
5712 ",
5713 [],
5714 |row| {
5715 Ok((
5716 row.get::<_, String>(0)?,
5717 row.get::<_, String>(1)?,
5718 row.get::<_, Option<i64>>(2)?,
5719 row.get::<_, String>(3)?,
5720 row.get::<_, String>(4)?,
5721 row.get::<_, Option<String>>(5)?,
5722 ))
5723 },
5724 )
5725 .unwrap();
5726 assert_eq!(edge.0, "uses_macro");
5727 assert_eq!(edge.1, "format");
5728 assert_eq!(edge.2, None);
5729 assert_eq!(edge.3, "NameOnly");
5730 assert_eq!(edge.4, "unresolved");
5731 assert!(edge.5.as_deref().is_some_and(|value| value.contains("format!")));
5732
5733 fs::remove_dir_all(root).unwrap();
5734 }
5735
5736 #[test]
5737 fn opening_old_graph_policy_rebuilds_stale_macro_edges() {
5738 let root = unique_temp_root();
5739 let _ = fs::remove_dir_all(&root);
5740 fs::create_dir_all(root.join("src")).unwrap();
5741 fs::write(
5742 root.join("src/lib.rs"),
5743 r#"
5744mod format;
5745
5746fn execute_one() {
5747 let _value = format!("hello");
5748}
5749"#,
5750 )
5751 .unwrap();
5752 fs::write(root.join("src/format.rs"), "pub fn helper() {}\n").unwrap();
5753 let config = source_config(root.clone(), Language::Rust);
5754 let db = IndexDatabase::rebuild(&config).unwrap();
5755 db.storage
5756 .connection()
5757 .execute("UPDATE index_meta SET value = 'old' WHERE key = 'graph_index_version'", [])
5758 .unwrap();
5759 db.storage
5760 .connection()
5761 .execute(
5762 "
5763 UPDATE edges
5764 SET edge_kind = 'calls_name',
5765 to_symbol_id = (SELECT id FROM symbols WHERE name = 'format' LIMIT 1),
5766 confidence = 'Syntactic',
5767 evidence = NULL,
5768 resolution = 'syntactic'
5769 WHERE to_name = 'format'
5770 ",
5771 [],
5772 )
5773 .unwrap();
5774 drop(db);
5775
5776 let reopened = IndexDatabase::open(&config.database).unwrap();
5777 let edge = reopened
5778 .storage
5779 .connection()
5780 .query_row(
5781 "
5782 SELECT edge_kind, to_symbol_id, confidence, resolution, evidence
5783 FROM edges
5784 WHERE to_name = 'format'
5785 AND edge_kind = 'uses_macro'
5786 ",
5787 [],
5788 |row| {
5789 Ok((
5790 row.get::<_, String>(0)?,
5791 row.get::<_, Option<i64>>(1)?,
5792 row.get::<_, String>(2)?,
5793 row.get::<_, String>(3)?,
5794 row.get::<_, Option<String>>(4)?,
5795 ))
5796 },
5797 )
5798 .unwrap();
5799 assert_eq!(edge.0, "uses_macro");
5800 assert_eq!(edge.1, None);
5801 assert_eq!(edge.2, "NameOnly");
5802 assert_eq!(edge.3, "unresolved");
5803 assert!(edge.4.as_deref().is_some_and(|value| value.contains("format!")));
5804
5805 fs::remove_dir_all(root).unwrap();
5806 }
5807
5808 #[test]
5809 fn qualified_common_member_calls_do_not_resolve_by_short_name() {
5810 let root = unique_temp_root();
5811 let _ = fs::remove_dir_all(&root);
5812 fs::create_dir_all(root.join("src")).unwrap();
5813 fs::write(
5814 root.join("src/lib.rs"),
5815 r#"
5816pub struct AlertsStore;
5817
5818impl AlertsStore {
5819 pub fn new() -> Self {
5820 Self
5821 }
5822}
5823
5824pub fn caller() {
5825 let _items: Vec<String> = Vec::new();
5826}
5827"#,
5828 )
5829 .unwrap();
5830 let config = source_config(root.clone(), Language::Rust);
5831 let db = IndexDatabase::rebuild(&config).unwrap();
5832
5833 let edge = db
5834 .storage
5835 .connection()
5836 .query_row(
5837 "
5838 SELECT to_name, target_qualified_name, to_symbol_id, confidence, resolution
5839 FROM edges
5840 WHERE from_name LIKE '%caller'
5841 AND edge_kind = 'calls_name'
5842 AND to_name = 'new'
5843 ",
5844 [],
5845 |row| {
5846 Ok((
5847 row.get::<_, String>(0)?,
5848 row.get::<_, Option<String>>(1)?,
5849 row.get::<_, Option<i64>>(2)?,
5850 row.get::<_, String>(3)?,
5851 row.get::<_, String>(4)?,
5852 ))
5853 },
5854 )
5855 .unwrap();
5856 assert_eq!(edge.0, "new");
5857 assert_eq!(edge.1.as_deref(), Some("Vec::new"));
5858 assert_eq!(edge.2, None);
5859 assert_eq!(edge.3, "NameOnly");
5860 assert_eq!(edge.4, "unresolved");
5861
5862 fs::remove_dir_all(root).unwrap();
5863 }
5864
5865 #[test]
5866 fn macro_edges_do_not_resolve_to_same_named_typescript_symbols() {
5867 let root = unique_temp_root();
5868 let _ = fs::remove_dir_all(&root);
5869 fs::create_dir_all(root.join("src")).unwrap();
5870 fs::write(
5871 root.join("src/lib.rs"),
5872 r#"
5873fn rust_entry() {
5874 let _payload = json!({"ok": true});
5875}
5876"#,
5877 )
5878 .unwrap();
5879 fs::write(root.join("src/preferences.ts"), "export function json() { return {}; }\n")
5880 .unwrap();
5881 let mut config = source_config(root.clone(), Language::Rust);
5882 config.targets.push(ResolvedTarget {
5883 name: "typescript".to_string(),
5884 language: Language::TypeScript,
5885 directories: vec![PathBuf::from("src")],
5886 include: vec!["**/*.ts".to_string()],
5887 exclude: Vec::new(),
5888 kind: TargetKind::Source,
5889 });
5890 let db = IndexDatabase::rebuild(&config).unwrap();
5891
5892 let edge = db
5893 .storage
5894 .connection()
5895 .query_row(
5896 "
5897 SELECT edge_kind, to_name, to_symbol_id, confidence, resolution, evidence
5898 FROM edges
5899 WHERE edge_kind = 'uses_macro'
5900 AND to_name = 'json'
5901 ",
5902 [],
5903 |row| {
5904 Ok((
5905 row.get::<_, String>(0)?,
5906 row.get::<_, String>(1)?,
5907 row.get::<_, Option<i64>>(2)?,
5908 row.get::<_, String>(3)?,
5909 row.get::<_, String>(4)?,
5910 row.get::<_, Option<String>>(5)?,
5911 ))
5912 },
5913 )
5914 .unwrap();
5915 assert_eq!(edge.0, "uses_macro");
5916 assert_eq!(edge.1, "json");
5917 assert_eq!(edge.2, None);
5918 assert_eq!(edge.3, "NameOnly");
5919 assert_eq!(edge.4, "unresolved");
5920 assert!(edge.5.as_deref().is_some_and(|value| value.contains("json!")));
5921
5922 fs::remove_dir_all(root).unwrap();
5923 }
5924
5925 #[test]
5926 fn qualified_crate_helper_callers_use_name_fallback() {
5927 let root = unique_temp_root();
5928 let _ = fs::remove_dir_all(&root);
5929 fs::create_dir_all(root.join("src")).unwrap();
5930 fs::write(
5931 root.join("src/lib.rs"),
5932 r#"
5933pub mod task_spawn {
5934 pub fn spawn_blocking() {}
5935}
5936
5937pub fn first() {
5938 crate::task_spawn::spawn_blocking();
5939}
5940
5941pub fn second() {
5942 task_spawn::spawn_blocking();
5943}
5944"#,
5945 )
5946 .unwrap();
5947 let config = source_config(root.clone(), Language::Rust);
5948 let db = IndexDatabase::rebuild(&config).unwrap();
5949
5950 let callers = db.find_callers("spawn_blocking", 10).unwrap();
5951 assert!(
5952 callers.iter().any(|edge| {
5953 edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("first"))
5954 && edge.edge_kind == "calls_name"
5955 && edge.resolution == "target_name_fallback"
5956 }),
5957 "spawn_blocking callers: {callers:?}"
5958 );
5959 assert!(
5960 callers.iter().any(|edge| {
5961 edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("second"))
5962 && edge.edge_kind == "calls_name"
5963 }),
5964 "spawn_blocking callers: {callers:?}"
5965 );
5966
5967 fs::remove_dir_all(root).unwrap();
5968 }
5969
5970 #[test]
5971 fn caller_lookup_does_not_match_related_names_or_chain_evidence() {
5972 let root = unique_temp_root();
5973 let _ = fs::remove_dir_all(&root);
5974 fs::create_dir_all(root.join("src")).unwrap();
5975 fs::write(
5976 root.join("src/lib.rs"),
5977 r#"
5978pub mod runtime {
5979 pub mod task_spawn {
5980 pub fn spawn() {}
5981 pub fn spawn_blocking() -> JoinHandle {
5982 JoinHandle
5983 }
5984 pub fn spawn_blocking_handle() {}
5985 pub fn spawn_blocking_offload() -> JoinHandle {
5986 JoinHandle
5987 }
5988 }
5989}
5990
5991pub struct JoinHandle;
5992
5993impl JoinHandle {
5994 pub fn map_err(self) {}
5995}
5996
5997pub fn direct() {
5998 crate::runtime::task_spawn::spawn_blocking();
5999}
6000
6001pub fn related_handle() {
6002 crate::runtime::task_spawn::spawn_blocking_handle();
6003}
6004
6005pub fn related_offload_chain() {
6006 crate::runtime::task_spawn::spawn_blocking_offload().map_err();
6007}
6008
6009pub fn related_spawn_with_text() {
6010 crate::runtime::task_spawn::spawn();
6011}
6012"#,
6013 )
6014 .unwrap();
6015 let config = source_config(root.clone(), Language::Rust);
6016 let db = IndexDatabase::rebuild(&config).unwrap();
6017
6018 let callers = db.find_callers("spawn_blocking", 20).unwrap();
6019 assert!(
6020 callers.iter().any(|edge| {
6021 edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("direct"))
6022 && edge.target.as_deref() == Some("spawn_blocking")
6023 && edge.edge_kind == "calls_name"
6024 }),
6025 "spawn_blocking callers: {callers:?}"
6026 );
6027 assert!(
6028 callers.iter().all(|edge| {
6029 !edge.from_symbol.as_deref().is_some_and(|name| {
6030 name.ends_with("related_handle")
6031 || name.ends_with("related_offload_chain")
6032 || name.ends_with("related_spawn_with_text")
6033 }) && !matches!(
6034 edge.target.as_deref(),
6035 Some("spawn_blocking_handle" | "spawn_blocking_offload" | "spawn" | "map_err")
6036 )
6037 }),
6038 "caller lookup leaked related names or chain evidence: {callers:?}"
6039 );
6040
6041 let qualified_callers = db.find_callers("src/lib.rs::spawn_blocking", 20).unwrap();
6042 assert!(
6043 qualified_callers.iter().any(|edge| {
6044 edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("direct"))
6045 && edge.target.as_deref() == Some("spawn_blocking")
6046 && edge.edge_kind == "calls_name"
6047 }),
6048 "qualified spawn_blocking callers: {qualified_callers:?}"
6049 );
6050 assert!(
6051 qualified_callers.iter().all(|edge| {
6052 !edge.from_symbol.as_deref().is_some_and(|name| {
6053 name.ends_with("related_handle")
6054 || name.ends_with("related_offload_chain")
6055 || name.ends_with("related_spawn_with_text")
6056 }) && !matches!(
6057 edge.target.as_deref(),
6058 Some("spawn_blocking_handle" | "spawn_blocking_offload" | "spawn" | "map_err")
6059 )
6060 }),
6061 "qualified caller lookup leaked related names or chain evidence: {qualified_callers:?}"
6062 );
6063
6064 fs::remove_dir_all(root).unwrap();
6065 }
6066
6067 #[test]
6068 fn files_past_the_old_structural_cap_still_contribute_symbols_and_edges() {
6069 let root = unique_temp_root();
6070 let _ = fs::remove_dir_all(&root);
6071 fs::create_dir_all(root.join("src")).unwrap();
6072 let filler =
6073 (0..700).map(|idx| format!("pub fn filler_{idx}() {{}}\n")).collect::<String>();
6074 fs::write(
6075 root.join("src/lib.rs"),
6076 format!(
6077 r#"
6078pub mod task_spawn {{
6079 pub fn spawn_blocking() {{}}
6080}}
6081
6082{filler}
6083
6084pub fn caller() {{
6085 crate::task_spawn::spawn_blocking();
6086}}
6087"#
6088 ),
6089 )
6090 .unwrap();
6091 let config = source_config(root.clone(), Language::Rust);
6092 assert!(fs::metadata(root.join("src/lib.rs")).unwrap().len() > 10_000);
6093 let db = IndexDatabase::rebuild(&config).unwrap();
6094
6095 let symbols = db.symbols("caller", Some(Language::Rust), 10).unwrap();
6096 assert!(
6097 symbols.iter().any(|symbol| symbol.name == "caller"),
6098 "caller symbols: {symbols:?}"
6099 );
6100 let callers = db.find_callers("spawn_blocking", 10).unwrap();
6101 assert!(
6102 callers.iter().any(|edge| {
6103 edge.edge_kind == "calls_name"
6104 && edge.target.as_deref() == Some("spawn_blocking")
6105 && edge.callsite.as_ref().is_some_and(|callsite| callsite.line > 700)
6106 }),
6107 "spawn_blocking callers: {callers:?}"
6108 );
6109 let impact =
6110 db.impact_surface("callers of crate::task_spawn::spawn_blocking in src", 10).unwrap();
6111 assert!(
6112 impact.iter().any(|item| {
6113 item.category == "Direct structural impact" && item.reason == "direct_caller"
6114 }),
6115 "impact: {impact:?}"
6116 );
6117
6118 fs::remove_dir_all(root).unwrap();
6119 }
6120
6121 #[test]
6122 fn impact_surface_uses_high_signal_query_symbols_and_call_edges() {
6123 let root = unique_temp_root();
6124 let _ = fs::remove_dir_all(&root);
6125 fs::create_dir_all(root.join("src")).unwrap();
6126 fs::write(
6127 root.join("src/lib.rs"),
6128 r#"
6129pub mod runtime {
6130 pub fn unrelated_runtime_symbol() {}
6131}
6132
6133pub mod task_spawn {
6134 pub fn spawn_blocking<F, T>(f: F) -> T
6135 where
6136 F: FnOnce() -> T + Send + 'static,
6137 T: Send + 'static,
6138 {
6139 f()
6140 }
6141}
6142
6143pub fn caller() {
6144 crate::task_spawn::spawn_blocking(|| 1);
6145}
6146"#,
6147 )
6148 .unwrap();
6149 let config = source_config(root.clone(), Language::Rust);
6150 let db = IndexDatabase::rebuild(&config).unwrap();
6151 let impact = db
6152 .impact_surface(
6153 "change runtime task_spawn spawn_blocking wasm inline native blocking pool",
6154 20,
6155 )
6156 .unwrap();
6157 assert!(
6158 impact.iter().any(|item| {
6159 item.category == "Direct structural impact"
6160 && item.reason == "direct_caller"
6161 && item.symbol.as_deref().is_some_and(|symbol| symbol.ends_with("caller"))
6162 }),
6163 "spawn_blocking caller should be present: {impact:?}"
6164 );
6165 assert!(
6166 impact.iter().all(|item| {
6167 !(item.reason == "exact_symbol_definition"
6168 && item.symbol.as_deref().is_some_and(|symbol| symbol.ends_with("runtime")))
6169 }),
6170 "broad `runtime` token should not become an exact impact seed: {impact:?}"
6171 );
6172 assert!(
6173 impact.iter().all(|item| {
6174 !item.evidence.iter().any(|evidence| evidence.contains("references_type"))
6175 && item.symbol.as_deref() != Some("Send")
6176 }),
6177 "type references should not appear as direct impact: {impact:?}"
6178 );
6179
6180 fs::remove_dir_all(root).unwrap();
6181 }
6182
6183 #[test]
6184 fn impact_surface_collapses_file_matches_to_one_row_per_file() {
6185 let root = unique_temp_root();
6188 let _ = fs::remove_dir_all(&root);
6189 fs::create_dir_all(root.join("src")).unwrap();
6190 fs::write(
6191 root.join("src/widget_store.rs"),
6192 "pub fn widget_alpha() {}\npub fn widget_beta() {}\n\
6193 pub fn widget_gamma() {}\npub fn widget_delta() {}\n",
6194 )
6195 .unwrap();
6196 let config = source_config(root.clone(), Language::Rust);
6197 let db = IndexDatabase::rebuild(&config).unwrap();
6198
6199 let selector = crate::query::symbol::SymbolSelector {
6200 logical_symbol_id: None,
6201 symbol_id: None,
6202 symbol_path: None,
6203 symbol: Some("widget_alpha".to_string()),
6204 language: Some(Language::Rust),
6205 allow_ambiguous: false,
6206 limit: 10,
6207 };
6208 let symbol = db.select_symbol(&selector).unwrap().unwrap().expect("symbol");
6209 let report = db
6210 .impact_surface_report_for_selected_symbol(
6211 &symbol,
6212 50,
6213 &crate::query::impact::ImpactSurfaceOptions::default(),
6214 )
6215 .unwrap();
6216
6217 for section in [
6218 &report.text_fallback_hits,
6219 &report.tests_touching_symbol_path,
6220 &report.docs_mentioning_symbol_path,
6221 ] {
6222 let total = section.len();
6223 let mut paths: Vec<&str> = section.iter().map(|item| item.path.as_str()).collect();
6224 paths.sort_unstable();
6225 paths.dedup();
6226 assert_eq!(paths.len(), total, "section must have one row per file: {section:?}");
6227
6228 for item in section {
6231 if item.evidence.iter().any(|evidence| evidence.starts_with("path match")) {
6232 assert!(item.symbol.is_none(), "path match must not name a symbol: {item:?}");
6233 }
6234 }
6235 }
6236
6237 let store_rows = report
6238 .text_fallback_hits
6239 .iter()
6240 .filter(|item| item.path.ends_with("widget_store.rs"))
6241 .count();
6242 assert_eq!(store_rows, 1, "a file with four symbols collapses to one fallback row");
6243
6244 fs::remove_dir_all(root).unwrap();
6245 }
6246
6247 #[test]
6248 fn docs_for_symbol_prefers_local_source_context_before_broad_markdown() {
6249 let root = unique_temp_root();
6250 let _ = fs::remove_dir_all(&root);
6251 fs::create_dir_all(root.join("src/runtime")).unwrap();
6252 fs::create_dir_all(root.join("docs")).unwrap();
6253 fs::write(
6254 root.join("src/runtime/task_spawn.rs"),
6255 r#"
6256pub fn spawn_blocking<F, T>(f: F) -> T
6257where
6258 F: FnOnce() -> T + Send + 'static,
6259 T: Send + 'static,
6260{
6261 f()
6262}
6263"#,
6264 )
6265 .unwrap();
6266 fs::write(
6267 root.join("docs/phrase-persistence.md"),
6268 "# Phrase persistence\nUnrelated notes mention spawn_blocking in passing.\n",
6269 )
6270 .unwrap();
6271 fs::write(
6272 root.join("docs/task_spawn.md"),
6273 "# task_spawn\nLocal task_spawn notes explain spawn_blocking.\n",
6274 )
6275 .unwrap();
6276 let config = Config {
6277 root: root.clone(),
6278 database: root.join(".rag-rat/index.sqlite"),
6279 targets: vec![
6280 ResolvedTarget {
6281 name: "rust".to_string(),
6282 language: Language::Rust,
6283 directories: vec![PathBuf::from("src")],
6284 include: vec!["src/".to_string()],
6285 exclude: Vec::new(),
6286 kind: TargetKind::Source,
6287 },
6288 ResolvedTarget {
6289 name: "markdown".to_string(),
6290 language: Language::Markdown,
6291 directories: vec![PathBuf::from("docs")],
6292 include: vec!["**/*.md".to_string()],
6293 exclude: Vec::new(),
6294 kind: TargetKind::Docs,
6295 },
6296 ],
6297 local_ai: Default::default(),
6298 watch: Default::default(),
6299 };
6300 let db = IndexDatabase::rebuild(&config).unwrap();
6301 let symbol = db.symbols("spawn_blocking", Some(Language::Rust), 10).unwrap().remove(0);
6302 let hits = db.docs_for_selected_symbol(&symbol, 10).unwrap();
6303 assert_eq!(hits[0].path, "src/runtime/task_spawn.rs", "docs hits: {hits:?}");
6304 let phrase_index = hits.iter().position(|hit| hit.path == "docs/phrase-persistence.md");
6305 let task_spawn_index = hits.iter().position(|hit| hit.path == "docs/task_spawn.md");
6306 assert!(
6307 phrase_index.is_none_or(|phrase| task_spawn_index.is_some_and(|local| local < phrase)),
6308 "path-local task_spawn docs should outrank unrelated phrase docs: {hits:?}"
6309 );
6310
6311 fs::remove_dir_all(root).unwrap();
6312 }
6313
6314 #[test]
6315 fn partial_tree_sitter_trees_still_contribute_valid_symbols_and_edges() {
6316 let root = unique_temp_root();
6317 let _ = fs::remove_dir_all(&root);
6318 fs::create_dir_all(root.join("src")).unwrap();
6319 fs::write(
6320 root.join("src/lib.rs"),
6321 r#"
6322pub fn helper() {}
6323
6324pub fn caller() {
6325 helper();
6326}
6327
6328fn broken( {
6329"#,
6330 )
6331 .unwrap();
6332 let config = source_config(root.clone(), Language::Rust);
6333 let db = IndexDatabase::rebuild(&config).unwrap();
6334
6335 let symbols = db.symbols("caller", Some(Language::Rust), 10).unwrap();
6336 assert!(
6337 symbols.iter().any(|symbol| symbol.name == "caller"),
6338 "caller symbols: {symbols:?}"
6339 );
6340 assert_edge(&db, "caller", "helper", "calls_name", "Syntactic");
6341
6342 fs::remove_dir_all(root).unwrap();
6343 }
6344
6345 #[test]
6346 fn receiver_method_calls_do_not_bind_to_same_named_free_functions() {
6347 let root = unique_temp_root();
6348 let _ = fs::remove_dir_all(&root);
6349 fs::create_dir_all(root.join("src")).unwrap();
6350 fs::write(
6351 root.join("src/lib.rs"),
6352 r#"
6353pub fn spawn_blocking() {}
6354
6355pub fn caller(joinset: JoinSet) {
6356 joinset.spawn_blocking();
6357}
6358
6359pub struct JoinSet;
6360"#,
6361 )
6362 .unwrap();
6363 let config = source_config(root.clone(), Language::Rust);
6364 let db = IndexDatabase::rebuild(&config).unwrap();
6365
6366 let edge = db
6367 .storage
6368 .connection()
6369 .query_row(
6370 "
6371 SELECT to_name, target_qualified_name, to_symbol_id, confidence, resolution, receiver_hint
6372 FROM edges
6373 WHERE from_name LIKE '%caller'
6374 AND edge_kind = 'calls_name'
6375 AND to_name = 'spawn_blocking'
6376 ",
6377 [],
6378 |row| {
6379 Ok((
6380 row.get::<_, String>(0)?,
6381 row.get::<_, Option<String>>(1)?,
6382 row.get::<_, Option<i64>>(2)?,
6383 row.get::<_, String>(3)?,
6384 row.get::<_, String>(4)?,
6385 row.get::<_, Option<String>>(5)?,
6386 ))
6387 },
6388 )
6389 .unwrap();
6390 assert_eq!(edge.0, "spawn_blocking");
6391 assert_eq!(edge.1.as_deref(), Some("joinset::spawn_blocking"));
6392 assert_eq!(edge.2, None);
6393 assert_eq!(edge.3, "NameOnly");
6394 assert_eq!(edge.4, "unresolved");
6395 assert_eq!(edge.5.as_deref(), Some("joinset"));
6396
6397 fs::remove_dir_all(root).unwrap();
6398 }
6399
6400 #[test]
6401 fn trace_callees_excludes_type_references_by_default() {
6402 let root = unique_temp_root();
6403 let _ = fs::remove_dir_all(&root);
6404 fs::create_dir_all(root.join("src")).unwrap();
6405 fs::write(
6406 root.join("src/lib.rs"),
6407 r#"
6408pub struct JoinError;
6409pub enum Result<T, E> { Ok(T), Err(E) }
6410pub fn helper() {}
6411
6412pub fn spawn_blocking<F, T>(f: F) -> Result<T, JoinError>
6413where
6414 F: FnOnce() -> T + Send + 'static,
6415 T: Send + 'static,
6416{
6417 helper();
6418 tokio::task::spawn_blocking(f)
6419}
6420"#,
6421 )
6422 .unwrap();
6423 let config = source_config(root.clone(), Language::Rust);
6424 let db = IndexDatabase::rebuild(&config).unwrap();
6425
6426 let default_callees = db.trace_callees("spawn_blocking", 20).unwrap();
6427 assert!(
6428 default_callees.iter().any(|edge| {
6429 edge.edge_kind == "calls_name"
6430 && edge.target.as_deref() == Some("helper")
6431 && edge.verified_target_symbol
6432 }),
6433 "default callees: {default_callees:?}"
6434 );
6435 assert!(
6436 default_callees
6437 .iter()
6438 .all(|edge| edge.target_qualified_name.as_deref()
6439 != Some("tokio::task::spawn_blocking")),
6440 "default callees leaked unresolved external call: {default_callees:?}"
6441 );
6442 assert!(
6443 default_callees.iter().all(|edge| edge.edge_kind != "references_type"),
6444 "default callees leaked type refs: {default_callees:?}"
6445 );
6446 assert!(
6447 default_callees.iter().all(|edge| !matches!(
6448 edge.target.as_deref(),
6449 Some("F" | "T" | "Send" | "Result" | "JoinError")
6450 )),
6451 "default callees leaked generic/type targets: {default_callees:?}"
6452 );
6453
6454 let with_refs = db
6455 .trace_callees_with_options(
6456 "spawn_blocking",
6457 20,
6458 &crate::query::graph::GraphTraversalOptions {
6459 include_references: true,
6460 edge_kinds: None,
6461 ..Default::default()
6462 },
6463 )
6464 .unwrap();
6465 assert!(
6466 with_refs.iter().any(|edge| edge.edge_kind == "references_type"),
6467 "reference-enabled callees: {with_refs:?}"
6468 );
6469
6470 let with_unresolved = db
6471 .trace_callees_with_options(
6472 "spawn_blocking",
6473 20,
6474 &crate::query::graph::GraphTraversalOptions {
6475 include_unresolved: true,
6476 ..Default::default()
6477 },
6478 )
6479 .unwrap();
6480 assert!(
6481 with_unresolved
6482 .iter()
6483 .any(|edge| edge.target_qualified_name.as_deref()
6484 == Some("tokio::task::spawn_blocking")),
6485 "unresolved-enabled callees: {with_unresolved:?}"
6486 );
6487
6488 fs::remove_dir_all(root).unwrap();
6489 }
6490
6491 #[test]
6492 fn trace_callees_defaults_to_repo_relevant_calls() {
6493 let root = unique_temp_root();
6494 let _ = fs::remove_dir_all(&root);
6495 fs::create_dir_all(root.join("src")).unwrap();
6496 fs::write(
6497 root.join("src/lib.rs"),
6498 r#"
6499pub fn repo_helper() {}
6500
6501pub fn caller(input: Result<String, String>) -> String {
6502 repo_helper();
6503 let values: Vec<String> = Vec::new();
6504 let _ = input.map_err(|error| error.to_string());
6505 let _ = Some("value").unwrap_or_else(|| "fallback");
6506 let _ = format!("hello");
6507 values.get(0).unwrap_or_else(|| "fallback").to_string()
6508}
6509"#,
6510 )
6511 .unwrap();
6512 let config = source_config(root.clone(), Language::Rust);
6513 let db = IndexDatabase::rebuild(&config).unwrap();
6514
6515 let default_callees = db.trace_callees("caller", 20).unwrap();
6516 assert!(
6517 default_callees.iter().any(|edge| edge.target.as_deref() == Some("repo_helper")),
6518 "default callees should keep repo-local calls: {default_callees:?}"
6519 );
6520 assert!(
6521 default_callees.iter().all(|edge| {
6522 edge.edge_kind != "uses_macro"
6523 && !matches!(
6524 edge.target.as_deref(),
6525 Some("new" | "map_err" | "unwrap_or_else" | "to_string" | "format")
6526 )
6527 }),
6528 "default callees leaked low-signal calls: {default_callees:?}"
6529 );
6530
6531 let expanded = db
6532 .trace_callees_with_options(
6533 "caller",
6534 20,
6535 &crate::query::graph::GraphTraversalOptions {
6536 include_unresolved: true,
6537 include_macros: true,
6538 include_common_methods: true,
6539 ..Default::default()
6540 },
6541 )
6542 .unwrap();
6543 assert!(
6544 expanded.iter().any(|edge| edge.edge_kind == "uses_macro"),
6545 "macro-enabled callees: {expanded:?}"
6546 );
6547 assert!(
6548 expanded.iter().any(|edge| edge.target.as_deref() == Some("unwrap_or_else")),
6549 "common-method-enabled callees: {expanded:?}"
6550 );
6551
6552 fs::remove_dir_all(root).unwrap();
6553 }
6554
6555 #[test]
6556 fn indexes_kotlin_graph_edges_from_tree_sitter() {
6557 let root = unique_temp_root();
6558 let _ = fs::remove_dir_all(&root);
6559 fs::create_dir_all(root.join("src")).unwrap();
6560 fs::write(
6561 root.join("src/Main.kt"),
6562 r#"
6563package dev.cq27.test
6564
6565import dev.cq27.lib.ExternalThing
6566
6567interface Syncable
6568
6569class MainBridge : Syncable {
6570 suspend fun syncOnce() {
6571 helper()
6572 ExternalThing()
6573 }
6574}
6575
6576fun helper() {}
6577"#,
6578 )
6579 .unwrap();
6580 let config = source_config(root.clone(), Language::Kotlin);
6581 let db = IndexDatabase::rebuild(&config).unwrap();
6582
6583 assert_edge(&db, "syncOnce", "helper", "calls_name", "Syntactic");
6584 assert_edge(&db, "MainBridge", "Syncable", "implements", "Syntactic");
6585 assert_edge(&db, "src/Main.kt", "ExternalThing", "imports", "NameOnly");
6586 let impact = db.impact_surface("helper", 10).unwrap();
6587 assert!(
6588 impact.iter().any(|item| {
6589 item.category == "Direct structural impact" && item.reason == "direct_caller"
6590 }),
6591 "impact: {impact:?}"
6592 );
6593
6594 fs::remove_dir_all(root).unwrap();
6595 }
6596
6597 #[test]
6598 fn indexes_real_world_kotlin_graph_patterns() {
6599 let root = fixture_temp_root("graph-realworld/kotlin");
6600 let config = source_config(root.clone(), Language::Kotlin);
6601 let db = IndexDatabase::rebuild(&config).unwrap();
6602
6603 assert_edge(&db, "src/Main.kt", "ExternalFactory", "imports", "NameOnly");
6604 assert_edge(&db, "Worker", "companion", "contains", "Exact");
6605 assert_edge(&db, "companion", "create", "contains", "Exact");
6606 assert_edge(&db, "syncOnce", "create", "calls_name", "Syntactic");
6607 assert_edge(&db, "syncOnce", "Worker", "references_type", "Syntactic");
6608 assert_edge(&db, "syncOnce", "run", "calls_name", "Syntactic");
6609 assert_edge(&db, "syncOnce", "SingletonRunner", "references_type", "Syntactic");
6610 assert_edge(&db, "syncOnce", "ExternalFactory", "calls_name", "NameOnly");
6611 assert_edge(&db, "syncOnce", "ExternalFactory", "references_type", "NameOnly");
6612 assert_edge(&db, "syncOnce", "cleaned", "calls_name", "Syntactic");
6613 let callers = db.find_callers("cleaned", 10).unwrap();
6614 assert!(
6615 callers.iter().any(|edge| {
6616 edge.edge_kind == "calls_name"
6617 && edge.edge_confidence == edge.confidence
6618 && edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("syncOnce"))
6619 }),
6620 "cleaned callers: {callers:?}"
6621 );
6622
6623 fs::remove_dir_all(root).unwrap();
6624 }
6625
6626 #[test]
6627 fn kotlin_caller_lookup_respects_qualified_receivers_for_common_method_names() {
6628 let root = unique_temp_root();
6629 let _ = fs::remove_dir_all(&root);
6630 fs::create_dir_all(root.join("src")).unwrap();
6631 fs::write(
6632 root.join("src/Main.kt"),
6633 r#"
6634package dev.cq27.test
6635
6636object WatchProposalBuilder {
6637 fun build(): String = "proposal"
6638}
6639
6640class AndroidDialogBuilder {
6641 fun build(): String = "dialog"
6642}
6643
6644fun actualCaller() {
6645 WatchProposalBuilder.build()
6646}
6647
6648fun unrelatedBuilderCalls(dialog: AndroidDialogBuilder) {
6649 dialog.build()
6650 AndroidDialogBuilder().build()
6651}
6652"#,
6653 )
6654 .unwrap();
6655 let config = source_config(root.clone(), Language::Kotlin);
6656 let db = IndexDatabase::rebuild(&config).unwrap();
6657 let target = db
6658 .symbols("build", Some(Language::Kotlin), 10)
6659 .unwrap()
6660 .into_iter()
6661 .find(|symbol| symbol.qualified_name.contains("WatchProposalBuilder"))
6662 .expect("WatchProposalBuilder.build symbol");
6663 let callers = db
6664 .find_callers_with_options(
6665 "build",
6666 20,
6667 &crate::query::graph::GraphTraversalOptions {
6668 resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
6669 symbol_id: Some(target.symbol_id),
6670 ..Default::default()
6671 },
6672 )
6673 .unwrap();
6674 assert_eq!(
6675 callers
6676 .iter()
6677 .filter(|edge| edge
6678 .from_symbol
6679 .as_deref()
6680 .is_some_and(|name| name.ends_with("actualCaller")))
6681 .count(),
6682 1,
6683 "actual caller should be present once: {callers:?}"
6684 );
6685 assert!(
6686 callers.iter().all(|edge| edge
6687 .from_symbol
6688 .as_deref()
6689 .is_none_or(|name| !name.ends_with("unrelatedBuilderCalls"))),
6690 "unrelated builder calls should not resolve to WatchProposalBuilder.build: {callers:?}"
6691 );
6692
6693 fs::remove_dir_all(root).unwrap();
6694 }
6695
6696 #[test]
6697 fn github_sync_caches_papertrail_and_rationale_without_query_time_crawling() {
6698 let (root, config) =
6699 markdown_config("# Decision\nRefs cq27-dev/rag-rat#42\nwe will keep sqlite\n");
6700 let db = IndexDatabase::rebuild(&config).unwrap();
6701 let mock = MockGitHubClient;
6702
6703 let offline =
6704 github::sync_from_refs::<MockGitHubClient>(db.storage.connection(), &root, None, true)
6705 .unwrap();
6706 assert!(offline.offline);
6707 assert_eq!(offline.discovered_refs, 1);
6708 assert_eq!(offline.synced_items, 0);
6709
6710 let report =
6711 github::sync_from_refs(db.storage.connection(), &root, Some(&mock), false).unwrap();
6712 assert!(!report.offline);
6713 assert_eq!(report.discovered_refs, 1);
6714 assert_eq!(report.synced_items, 5);
6715 assert_eq!(report.status.issues, 1);
6716 assert_eq!(report.status.comments, 1);
6717 assert_eq!(report.status.pulls, 1);
6718 assert_eq!(report.status.reviews, 1);
6719 assert_eq!(report.status.review_comments, 1);
6720
6721 let issue_hits = db.github_issue_search("sqlite", 10).unwrap();
6722 assert_eq!(issue_hits.len(), 1);
6723 assert_eq!(issue_hits[0].classification, "decision");
6724 assert_eq!(issue_hits[0].evidence_kind, "historical_github");
6725
6726 let refs = db.github_refs_for_path("docs/search.md", 10).unwrap();
6727 assert_eq!(refs.len(), 1);
6728 assert_eq!(refs[0].source_kind, "file");
6729
6730 let rationale = db.rationale_search("risk", 10).unwrap();
6731 assert!(rationale.iter().any(|item| item.classification == "risk"));
6732 let issue_ref_rationale = db.rationale_search("Fixes #42", 10).unwrap();
6733 assert_eq!(issue_ref_rationale.first().map(|item| item.number), Some(42));
6734 assert_eq!(
6735 issue_ref_rationale.first().map(|item| item.evidence_kind),
6736 Some("literal_github_ref")
6737 );
6738 assert_eq!(issue_ref_rationale.first().map(|item| item.score), Some(1.0));
6739 assert!(
6740 issue_ref_rationale.iter().any(|item| item.number == 42),
6741 "issue ref rationale should use structured GitHub refs: {issue_ref_rationale:?}"
6742 );
6743
6744 let chunk_id = first_chunk_id(&db);
6745 let papertrail = db.papertrail_for_chunk(chunk_id, 10).unwrap().unwrap();
6746 assert!(papertrail.current_source.is_some());
6747 assert!(!papertrail.github_evidence.is_empty());
6748 assert!(papertrail.github_evidence.iter().all(|item| {
6749 matches!(item.evidence_kind, "historical_github" | "literal_github_ref")
6750 }));
6751
6752 fs::remove_dir_all(root).unwrap();
6753 }
6754
6755 #[test]
6756 fn papertrail_for_commit_prefers_commit_sourced_github_refs() {
6757 let root = unique_temp_root();
6758 let _ = fs::remove_dir_all(&root);
6759 fs::create_dir_all(root.join("docs")).unwrap();
6760 run_git(&root, &["init"]);
6761 run_git(&root, &["config", "user.name", "Rag Rat"]);
6762 run_git(&root, &["config", "user.email", "rag@example.com"]);
6763 fs::write(root.join("docs/search.md"), "# Decision\nalpha\n").unwrap();
6764 run_git(&root, &["add", "."]);
6765 run_git(&root, &["commit", "-m", "Fix search rationale", "-m", "Fixes #42"]);
6766
6767 let config = markdown_config_for_root(root.clone());
6768 let db = IndexDatabase::rebuild(&config).unwrap();
6769 let commit = db
6770 .storage
6771 .connection()
6772 .query_row("SELECT hash FROM git_commits LIMIT 1", [], |row| row.get::<_, String>(0))
6773 .unwrap();
6774 let mock = MockGitHubClient;
6775 github::sync_from_refs(db.storage.connection(), &root, Some(&mock), false).unwrap();
6776
6777 let papertrail = db.papertrail_for_commit(&commit[..7], 10).unwrap();
6778 assert_eq!(papertrail.github_evidence.first().map(|item| item.number), Some(42));
6779 assert_eq!(
6780 papertrail.github_evidence.first().map(|item| item.evidence_kind),
6781 Some("literal_github_ref")
6782 );
6783 assert!(
6784 papertrail.fallback_github_evidence.is_empty(),
6785 "structured commit refs should suppress noisy fallback evidence: {papertrail:?}"
6786 );
6787
6788 fs::remove_dir_all(root).unwrap();
6789 }
6790
6791 #[test]
6792 fn papertrail_for_symbol_dedupes_duplicate_file_refs() {
6793 let root = unique_temp_root();
6794 let _ = fs::remove_dir_all(&root);
6795 fs::create_dir_all(root.join("src")).unwrap();
6796 fs::write(
6797 root.join("src/lib.rs"),
6798 "// First rationale (#42)\n// Second rationale (#42)\npub fn tracked_symbol() {}\n",
6799 )
6800 .unwrap();
6801 let config = source_config(root.clone(), Language::Rust);
6802 let db = IndexDatabase::rebuild(&config).unwrap();
6803 let mock = MockGitHubClient;
6804 github::sync_from_refs(db.storage.connection(), &root, Some(&mock), false).unwrap();
6805 let papertrail = db
6806 .papertrail_for_symbol("tracked_symbol", Some(Language::Rust), 10)
6807 .unwrap()
6808 .expect("tracked symbol papertrail");
6809
6810 assert_eq!(
6811 papertrail
6812 .github_evidence
6813 .iter()
6814 .filter(|item| item.number == 42 && item.item_kind == "issue")
6815 .count(),
6816 1,
6817 "duplicate #42 refs in one file should collapse to one issue evidence row: {papertrail:?}"
6818 );
6819
6820 fs::remove_dir_all(root).unwrap();
6821 }
6822
6823 #[test]
6824 fn github_sync_keeps_partial_cache_and_skips_synced_refs_after_404() {
6825 let (root, config) = markdown_config(
6826 "# Decision\nRefs cq27-dev/rag-rat#42 and cq27-dev/rag-rat#404\nwe will keep sqlite\n",
6827 );
6828 let db = IndexDatabase::rebuild(&config).unwrap();
6829 let mock = PartiallyFailingGitHubClient;
6830
6831 let report =
6832 github::sync_from_refs(db.storage.connection(), &root, Some(&mock), false).unwrap();
6833 assert_eq!(report.discovered_refs, 2);
6834 assert_eq!(report.synced_items, 5);
6835 assert_eq!(report.failed_refs, 1);
6836 assert_eq!(report.errors.len(), 1);
6837 assert_eq!(report.errors[0].number, 404);
6838 assert_eq!(report.errors[0].status, "not_found");
6839
6840 let issue_hits = db.github_issue_search("sqlite", 10).unwrap();
6841 assert_eq!(issue_hits.len(), 1);
6842 assert_eq!(issue_hits[0].number, 42);
6843
6844 let second =
6845 github::sync_from_refs(db.storage.connection(), &root, Some(&mock), false).unwrap();
6846 assert_eq!(second.synced_items, 0);
6847 assert_eq!(second.skipped_refs, 2);
6848 assert_eq!(second.failed_refs, 0);
6849
6850 fs::remove_dir_all(root).unwrap();
6851 }
6852
6853 #[test]
6854 fn search_recovers_when_fts_is_marked_dirty() {
6855 let (root, config) = markdown_config("alpha token");
6856 let db = IndexDatabase::rebuild(&config).unwrap();
6857 db.mark_fts_dirty().unwrap();
6858
6859 let dirty = db.status(&config.database).unwrap();
6860 assert!(dirty.fts_dirty);
6861 assert!(!dirty.fts_fresh);
6862
6863 let hits = db.search("alpha", 10, false).unwrap();
6864 assert_eq!(hits.len(), 1);
6865 assert_eq!(hits[0].summary, "alpha token");
6866 let fresh = db.status(&config.database).unwrap();
6867 assert!(!fresh.fts_dirty);
6868 assert!(fresh.fts_fresh);
6869
6870 fs::remove_dir_all(root).unwrap();
6871 }
6872
6873 #[test]
6874 fn read_chunk_relocates_small_line_drift_to_current_text() {
6875 let (root, config) = markdown_config("# Title\nalpha token\n");
6876 let db = IndexDatabase::rebuild(&config).unwrap();
6877 let chunk_id = first_chunk_id(&db);
6878 fs::write(root.join("docs/search.md"), "inserted\n# Title\nalpha token\n").unwrap();
6879
6880 let chunk = db.read_chunk(chunk_id).unwrap().unwrap();
6881 assert_eq!(chunk.start_line, 2);
6882 assert_eq!(chunk.end_line, 3);
6883 assert_eq!(chunk.text, "# Title\nalpha token\n");
6884
6885 fs::remove_dir_all(root).unwrap();
6886 }
6887
6888 #[test]
6889 fn read_chunk_large_drift_reindexes_and_reports_stale_chunk() {
6890 let (root, config) = markdown_config("# Title\nalpha token\n");
6891 let db = IndexDatabase::rebuild(&config).unwrap();
6892 let chunk_id = first_chunk_id(&db);
6893 fs::write(root.join("docs/search.md"), "# Replacement\nbeta token\n").unwrap();
6894
6895 let err = db.read_chunk(chunk_id).unwrap_err().to_string();
6896 assert!(err.contains("StaleChunk"), "{err}");
6897 let hits = db.search("beta", 10, false).unwrap();
6898 assert_eq!(hits.len(), 1);
6899 assert!(db.search("alpha", 10, false).unwrap().is_empty());
6900
6901 fs::remove_dir_all(root).unwrap();
6902 }
6903
6904 #[test]
6905 fn search_retries_after_healing_stale_hit() {
6906 let (root, config) = markdown_config("# Title\nalpha token\n");
6907 let db = IndexDatabase::rebuild(&config).unwrap();
6908 fs::write(root.join("docs/search.md"), "# Title\nbeta token\n").unwrap();
6909
6910 let hits = db.search("alpha", 10, false).unwrap();
6911 assert!(hits.is_empty());
6912 let beta_hits = db.search("beta", 10, false).unwrap();
6913 assert_eq!(beta_hits.len(), 1);
6914 assert!(beta_hits[0].summary.contains("beta"));
6915
6916 fs::remove_dir_all(root).unwrap();
6917 }
6918
6919 #[test]
6920 fn search_heals_relocated_hits_before_returning_line_spans() {
6921 let (root, config) = markdown_config("# Title\nalpha token\n");
6922 let db = IndexDatabase::rebuild(&config).unwrap();
6923 fs::write(root.join("docs/search.md"), "inserted\n# Title\nalpha token\n").unwrap();
6924
6925 let hits = db.search("alpha", 10, false).unwrap();
6926 assert_eq!(hits.len(), 1);
6927 assert_eq!(hits[0].start_line, 2);
6928 assert_eq!(hits[0].end_line, 3);
6929 assert!(hits[0].summary.contains("alpha"));
6930
6931 fs::remove_dir_all(root).unwrap();
6932 }
6933
6934 #[test]
6935 fn read_chunk_deleted_source_reports_gone() {
6936 let (root, config) = markdown_config("# Title\nalpha token\n");
6937 let db = IndexDatabase::rebuild(&config).unwrap();
6938 let chunk_id = first_chunk_id(&db);
6939 fs::remove_file(root.join("docs/search.md")).unwrap();
6940
6941 let err = db.read_chunk(chunk_id).unwrap_err().to_string();
6942 assert!(err.contains("Gone"), "{err}");
6943 assert!(db.search("alpha", 10, false).unwrap().is_empty());
6944
6945 fs::remove_dir_all(root).unwrap();
6946 }
6947
6948 #[test]
6949 fn search_returns_needs_reindex_when_heal_cap_is_exceeded() {
6950 let root = unique_temp_root();
6951 let _ = fs::remove_dir_all(&root);
6952 let docs = root.join("docs");
6953 fs::create_dir_all(&docs).unwrap();
6954 for index in 0..=MAX_AUTO_HEAL_FILES_PER_CALL {
6955 fs::write(docs.join(format!("doc-{index}.md")), "common stale token\n").unwrap();
6956 }
6957 let config = markdown_config_for_root(root.clone());
6958 let db = IndexDatabase::rebuild(&config).unwrap();
6959 for index in 0..=MAX_AUTO_HEAL_FILES_PER_CALL {
6960 fs::write(docs.join(format!("doc-{index}.md")), "fresh replacement token\n").unwrap();
6961 }
6962
6963 let err = db.search("common", 20, false).unwrap_err().to_string();
6964 assert!(err.contains("needs_reindex"), "{err}");
6965
6966 fs::remove_dir_all(root).unwrap();
6967 }
6968
6969 #[test]
6970 fn heal_index_limit_does_not_warn_when_only_fresh_files_are_skipped() {
6971 let root = unique_temp_root();
6972 let _ = fs::remove_dir_all(&root);
6973 let docs = root.join("docs");
6974 fs::create_dir_all(&docs).unwrap();
6975 fs::write(docs.join("one.md"), "one fresh token\n").unwrap();
6976 fs::write(docs.join("two.md"), "two fresh token\n").unwrap();
6977 let config = markdown_config_for_root(root.clone());
6978 let db = IndexDatabase::rebuild(&config).unwrap();
6979
6980 let report = db.heal_index(Some(1)).unwrap();
6981
6982 assert_eq!(report.healed_files, 0);
6983 assert_eq!(report.removed_files, 0);
6984 assert_eq!(report.skipped_files, 2);
6985 assert_eq!(report.message, None);
6986
6987 fs::remove_dir_all(root).unwrap();
6988 }
6989
6990 #[test]
6991 fn search_recovers_when_fts_revision_is_stale() {
6992 let (root, config) = markdown_config("alpha token");
6993 let db = IndexDatabase::rebuild(&config).unwrap();
6994 db.set_meta("fts_source_revision", "stale").unwrap();
6995
6996 let stale = db.status(&config.database).unwrap();
6997 assert!(!stale.fts_dirty);
6998 assert!(!stale.fts_fresh);
6999
7000 let hits = db.search("alpha", 10, false).unwrap();
7001 assert_eq!(hits.len(), 1);
7002 let fresh = db.status(&config.database).unwrap();
7003 assert_eq!(fresh.fts_source_revision.as_deref(), Some(fresh.content_revision.as_str()));
7004 assert!(fresh.fts_fresh);
7005
7006 fs::remove_dir_all(root).unwrap();
7007 }
7008
7009 #[test]
7010 fn parser_failures_report_paths() {
7011 let root = unique_temp_root();
7012 let _ = fs::remove_dir_all(&root);
7013 let src = root.join("src");
7014 fs::create_dir_all(&src).unwrap();
7015 fs::write(src.join("broken.rs"), "pub fn broken(").unwrap();
7016 let config = Config {
7017 root: root.clone(),
7018 database: root.join(".rag-rat/index.sqlite"),
7019 targets: vec![ResolvedTarget {
7020 name: "rust".to_string(),
7021 language: Language::Rust,
7022 directories: vec![PathBuf::from("src")],
7023 include: vec!["**/*.rs".to_string()],
7024 exclude: Vec::new(),
7025 kind: TargetKind::Source,
7026 }],
7027 local_ai: Default::default(),
7028 watch: Default::default(),
7029 };
7030
7031 let db = IndexDatabase::rebuild(&config).unwrap();
7032 let status = db.status(&config.database).unwrap();
7033 assert_eq!(status.parser_failures, 1);
7034 assert_eq!(status.parser_failure_paths[0].path, "src/broken.rs");
7035
7036 fs::remove_dir_all(root).unwrap();
7037 }
7038
7039 #[test]
7040 fn repo_memory_bound_to_logical_symbol_surfaces_in_symbol_chunk_and_impact() {
7041 let root = unique_temp_root();
7042 let _ = fs::remove_dir_all(&root);
7043 fs::create_dir_all(root.join("src")).unwrap();
7044 fs::write(
7045 root.join("src/lib.rs"),
7046 "#[cfg(unix)]\npub fn cfg_helper() {}\n#[cfg(windows)]\npub fn cfg_helper() {}\n",
7047 )
7048 .unwrap();
7049 let config = source_config(root.clone(), Language::Rust);
7050 let db = IndexDatabase::rebuild(&config).unwrap();
7051 let symbol = db
7052 .select_symbol(&crate::query::symbol::SymbolSelector {
7053 logical_symbol_id: None,
7054 symbol_id: None,
7055 symbol_path: None,
7056 symbol: Some("cfg_helper".to_string()),
7057 language: Some(Language::Rust),
7058 allow_ambiguous: true,
7059 limit: 10,
7060 })
7061 .unwrap()
7062 .unwrap()
7063 .expect("selected symbol");
7064 let logical_symbol_id = symbol.logical_symbol_id.expect("logical symbol id");
7065
7066 let created = db
7067 .memory_create(crate::query::memory::RepoMemoryCreate {
7068 kind: "Invariant".to_string(),
7069 title: "Treat cfg helper variants as one logical helper".to_string(),
7070 body: "Caller and impact analysis should use the logical symbol, not one cfg body variant."
7071 .to_string(),
7072 confidence: "high".to_string(),
7073 created_by: Some("test-agent".to_string()),
7074 source: Some("agent".to_string()),
7075 tags: vec!["cfg".to_string(), "graph".to_string()],
7076 bind: crate::query::memory::RepoMemoryBindTarget {
7077 logical_symbol_id: Some(logical_symbol_id),
7078 symbol_id: None,
7079 chunk_id: None,
7080 edge_id: None,
7081 path: None,
7082 start_line: None,
7083 end_line: None,
7084 commit_hash: None,
7085 github_owner: None,
7086 github_repo: None,
7087 github_number: None,
7088 start_logical_symbol_id: None,
7089 end_logical_symbol_id: None,
7090 edge_sequence_hash: None,
7091 path_summary: None,
7092 },
7093 })
7094 .unwrap();
7095 assert!(!created.duplicate);
7096 assert_eq!(created.memory.bindings[0].binding_kind, "logical_symbol");
7097
7098 let memories = db.memory_for_symbol(&symbol, 10).unwrap();
7099 assert_eq!(memories.len(), 1);
7100 assert_eq!(memories[0].kind, "Invariant");
7101 let chunk_id = memories[0].bindings[0].chunk_id.expect("bound chunk");
7102 let chunk = db.read_chunk(chunk_id).unwrap().expect("memory chunk");
7103 assert_eq!(chunk.memories.len(), 1);
7104 assert_eq!(chunk.memories[0].memory_id, created.memory.memory_id);
7105
7106 let impact = db
7107 .impact_surface_report_for_selected_symbol(
7108 &symbol,
7109 10,
7110 &crate::query::impact::ImpactSurfaceOptions::default(),
7111 )
7112 .unwrap();
7113 assert_eq!(impact.repo_memories.direct.len(), 1);
7114 assert_eq!(impact.completeness_and_caveats.memory_status.active, 1);
7115 assert_eq!(impact.completeness_and_caveats.memory_status.stale, 0);
7116
7117 fs::remove_dir_all(root).unwrap();
7118 }
7119
7120 #[test]
7121 fn repo_memory_survives_reindex_and_relocates_when_symbol_moves() {
7122 let root = unique_temp_root();
7126 let _ = fs::remove_dir_all(&root);
7127 fs::create_dir_all(root.join("src")).unwrap();
7128 fs::write(root.join("src/lib.rs"), "pub fn keystone() {}\n").unwrap();
7129 let config = source_config(root.clone(), Language::Rust);
7130 let db = IndexDatabase::rebuild(&config).unwrap();
7131
7132 let selector = crate::query::symbol::SymbolSelector {
7133 logical_symbol_id: None,
7134 symbol_id: None,
7135 symbol_path: None,
7136 symbol: Some("keystone".to_string()),
7137 language: Some(Language::Rust),
7138 allow_ambiguous: false,
7139 limit: 10,
7140 };
7141 let symbol = db.select_symbol(&selector).unwrap().unwrap().expect("symbol");
7142 let created = db
7143 .memory_create(crate::query::memory::RepoMemoryCreate {
7144 kind: "Invariant".to_string(),
7145 title: "keystone holds an invariant".to_string(),
7146 body: "This memory must survive a reindex and follow the symbol when it moves."
7147 .to_string(),
7148 confidence: "high".to_string(),
7149 created_by: Some("test".to_string()),
7150 source: Some("agent".to_string()),
7151 tags: Vec::new(),
7152 bind: crate::query::memory::RepoMemoryBindTarget {
7153 symbol_id: Some(symbol.symbol_id),
7154 logical_symbol_id: None,
7155 chunk_id: None,
7156 edge_id: None,
7157 path: None,
7158 start_line: None,
7159 end_line: None,
7160 commit_hash: None,
7161 github_owner: None,
7162 github_repo: None,
7163 github_number: None,
7164 start_logical_symbol_id: None,
7165 end_logical_symbol_id: None,
7166 edge_sequence_hash: None,
7167 path_summary: None,
7168 },
7169 })
7170 .unwrap();
7171
7172 fs::write(root.join("src/lib.rs"), "pub fn added_above() {}\n\npub fn keystone() {}\n")
7174 .unwrap();
7175 let db = IndexDatabase::rebuild(&config).unwrap();
7176
7177 assert!(
7179 crate::query::memory::memory_by_id(db.storage.connection(), &created.memory.memory_id,)
7180 .unwrap()
7181 .is_some(),
7182 "memory was lost to reindex",
7183 );
7184
7185 db.memory_validate().unwrap();
7187 let symbol = db.select_symbol(&selector).unwrap().unwrap().expect("symbol after move");
7188 let anchored = db.memory_for_symbol(&symbol, 10).unwrap();
7189 assert_eq!(anchored.len(), 1, "memory did not re-anchor to moved symbol");
7190 assert_ne!(anchored[0].bindings[0].anchor_status, "gone");
7191
7192 fs::remove_dir_all(root).unwrap();
7193 }
7194
7195 #[test]
7196 fn repo_memory_validate_marks_changed_or_missing_anchors_non_current() {
7197 let root = unique_temp_root();
7198 let _ = fs::remove_dir_all(&root);
7199 fs::create_dir_all(root.join("src")).unwrap();
7200 fs::write(root.join("src/lib.rs"), "pub fn anchored_memory() {}\n").unwrap();
7201 let config = source_config(root.clone(), Language::Rust);
7202 let db = IndexDatabase::rebuild(&config).unwrap();
7203 let symbol = db
7204 .select_symbol(&crate::query::symbol::SymbolSelector {
7205 logical_symbol_id: None,
7206 symbol_id: None,
7207 symbol_path: None,
7208 symbol: Some("anchored_memory".to_string()),
7209 language: Some(Language::Rust),
7210 allow_ambiguous: false,
7211 limit: 10,
7212 })
7213 .unwrap()
7214 .unwrap()
7215 .expect("selected symbol");
7216 let chunk_id = db
7217 .storage
7218 .connection()
7219 .query_row(
7220 "
7221 SELECT chunks.id
7222 FROM chunks
7223 JOIN files ON files.id = chunks.file_id
7224 WHERE files.path = ?1 AND chunks.symbol_path = ?2
7225 LIMIT 1
7226 ",
7227 params![symbol.path, symbol.qualified_name],
7228 |row| row.get::<_, i64>(0),
7229 )
7230 .unwrap();
7231 let created = db
7232 .memory_create(crate::query::memory::RepoMemoryCreate {
7233 kind: "Risk".to_string(),
7234 title: "Anchor must become stale when source hash changes".to_string(),
7235 body: "Validation should separate stale memories from current repo evidence."
7236 .to_string(),
7237 confidence: "medium".to_string(),
7238 created_by: Some("test-agent".to_string()),
7239 source: Some("agent".to_string()),
7240 tags: Vec::new(),
7241 bind: crate::query::memory::RepoMemoryBindTarget {
7242 logical_symbol_id: None,
7243 symbol_id: None,
7244 chunk_id: Some(chunk_id),
7245 edge_id: None,
7246 path: None,
7247 start_line: None,
7248 end_line: None,
7249 commit_hash: None,
7250 github_owner: None,
7251 github_repo: None,
7252 github_number: None,
7253 start_logical_symbol_id: None,
7254 end_logical_symbol_id: None,
7255 edge_sequence_hash: None,
7256 path_summary: None,
7257 },
7258 })
7259 .unwrap();
7260
7261 db.storage
7262 .connection()
7263 .execute("UPDATE chunks SET text_hash = 'changed' WHERE id = ?1", [chunk_id])
7264 .unwrap();
7265 let report = db.memory_validate().unwrap();
7266 assert_eq!(report.stale, 1);
7267 let stale = db.memory_for_symbol(&symbol, 10).unwrap();
7268 assert_eq!(stale[0].memory_id, created.memory.memory_id);
7269 assert_eq!(stale[0].bindings[0].anchor_status, "stale");
7270
7271 db.storage.connection().execute("DELETE FROM chunks WHERE id = ?1", [chunk_id]).unwrap();
7272 let report = db.memory_validate().unwrap();
7273 assert_eq!(report.gone, 1);
7274 let gone = db.memory_for_symbol(&symbol, 10).unwrap();
7275 assert_eq!(gone[0].bindings[0].anchor_status, "gone");
7276
7277 fs::remove_dir_all(root).unwrap();
7278 }
7279
7280 #[test]
7281 fn repo_memory_bound_to_edge_surfaces_when_impact_crosses_call_path() {
7282 let root = unique_temp_root();
7283 let _ = fs::remove_dir_all(&root);
7284 fs::create_dir_all(root.join("src")).unwrap();
7285 fs::write(
7286 root.join("src/lib.rs"),
7287 "pub fn target_edge() {}\npub fn caller_edge() {\n target_edge();\n}\n",
7288 )
7289 .unwrap();
7290 let config = source_config(root.clone(), Language::Rust);
7291 let db = IndexDatabase::rebuild(&config).unwrap();
7292 let target = db
7293 .select_symbol(&crate::query::symbol::SymbolSelector {
7294 logical_symbol_id: None,
7295 symbol_id: None,
7296 symbol_path: None,
7297 symbol: Some("target_edge".to_string()),
7298 language: Some(Language::Rust),
7299 allow_ambiguous: false,
7300 limit: 10,
7301 })
7302 .unwrap()
7303 .unwrap()
7304 .expect("selected target");
7305 let graph_options = crate::query::graph::GraphTraversalOptions {
7306 resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
7307 symbol_id: Some(target.symbol_id),
7308 logical_symbol_id: target.logical_symbol_id,
7309 ..Default::default()
7310 };
7311 let callers =
7312 db.graph_traversal_report("find_callers", &target, true, 10, &graph_options).unwrap();
7313 let edge_id = callers.results[0].edge_id;
7314
7315 let edge_memory = db
7316 .memory_create(crate::query::memory::RepoMemoryCreate {
7317 kind: "Risk".to_string(),
7318 title: "caller_edge to target_edge must stay synchronous".to_string(),
7319 body: "This specific call path is used to prove edge-bound memories surface when impact crosses the edge."
7320 .to_string(),
7321 confidence: "high".to_string(),
7322 created_by: Some("test-agent".to_string()),
7323 source: Some("agent".to_string()),
7324 tags: vec!["edge".to_string()],
7325 bind: crate::query::memory::RepoMemoryBindTarget {
7326 logical_symbol_id: None,
7327 symbol_id: None,
7328 chunk_id: None,
7329 edge_id: Some(edge_id),
7330 path: None,
7331 start_line: None,
7332 end_line: None,
7333 commit_hash: None,
7334 github_owner: None,
7335 github_repo: None,
7336 github_number: None,
7337 start_logical_symbol_id: None,
7338 end_logical_symbol_id: None,
7339 edge_sequence_hash: None,
7340 path_summary: None,
7341 },
7342 })
7343 .unwrap();
7344 assert_eq!(edge_memory.memory.bindings[0].binding_kind, "edge");
7345 assert_eq!(edge_memory.memory.bindings[0].edge_id, Some(edge_id));
7346
7347 let impact = db
7348 .impact_surface_report_for_selected_symbol(
7349 &target,
7350 10,
7351 &crate::query::impact::ImpactSurfaceOptions {
7352 resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
7353 ..Default::default()
7354 },
7355 )
7356 .unwrap();
7357 assert!(impact.repo_memories.direct.is_empty());
7358 assert_eq!(impact.repo_memories.path_crossed.len(), 1);
7359 assert_eq!(impact.repo_memories.path_crossed[0].memory_id, edge_memory.memory.memory_id);
7360 assert_eq!(impact.completeness_and_caveats.memory_status.active, 1);
7361
7362 let call_path_memory = db
7363 .memory_create(crate::query::memory::RepoMemoryCreate {
7364 kind: "TestExpectation".to_string(),
7365 title: "caller_edge path hash recall".to_string(),
7366 body: "Call-path memories are addressable by a deterministic edge sequence hash."
7367 .to_string(),
7368 confidence: "medium".to_string(),
7369 created_by: Some("test-agent".to_string()),
7370 source: Some("agent".to_string()),
7371 tags: vec!["call-path".to_string()],
7372 bind: crate::query::memory::RepoMemoryBindTarget {
7373 logical_symbol_id: None,
7374 symbol_id: None,
7375 chunk_id: None,
7376 edge_id: None,
7377 path: None,
7378 start_line: None,
7379 end_line: None,
7380 commit_hash: None,
7381 github_owner: None,
7382 github_repo: None,
7383 github_number: None,
7384 start_logical_symbol_id: target.logical_symbol_id,
7385 end_logical_symbol_id: target.logical_symbol_id,
7386 edge_sequence_hash: Some("edge-sequence-test-hash".to_string()),
7387 path_summary: Some("caller_edge -> target_edge".to_string()),
7388 },
7389 })
7390 .unwrap();
7391 let call_path = db.memory_for_call_path_hash("edge-sequence-test-hash", 10).unwrap();
7392 assert_eq!(call_path.len(), 1);
7393 assert_eq!(call_path[0].memory_id, call_path_memory.memory.memory_id);
7394 assert_eq!(call_path[0].call_paths[0].path_summary, "caller_edge -> target_edge");
7395
7396 fs::remove_dir_all(root).unwrap();
7397 }
7398
7399 #[test]
7400 fn repo_brief_ranks_churn_and_god_module_candidates() {
7401 let root = unique_temp_root();
7402 let _ = fs::remove_dir_all(&root);
7403 fs::create_dir_all(root.join("src")).unwrap();
7404 run_git(&root, &["init"]);
7405 run_git(&root, &["config", "user.name", "Rag Rat"]);
7406 run_git(&root, &["config", "user.email", "rag@example.com"]);
7407
7408 fs::write(root.join("src/stable.rs"), "pub fn stable() -> i32 { 1 }\n").unwrap();
7409 fs::write(root.join("src/hot.rs"), hot_module_text(0)).unwrap();
7410 run_git(&root, &["add", "."]);
7411 run_git(&root, &["commit", "-m", "Add initial modules"]);
7412
7413 for revision in 1..=3 {
7414 fs::write(root.join("src/hot.rs"), hot_module_text(revision)).unwrap();
7415 run_git(&root, &["add", "src/hot.rs"]);
7416 run_git(&root, &["commit", "-m", "Iterate hot module"]);
7417 }
7418
7419 let config = Config {
7420 root: root.clone(),
7421 database: root.join(".rag-rat/index.sqlite"),
7422 targets: vec![ResolvedTarget {
7423 name: "rust".to_string(),
7424 language: Language::Rust,
7425 directories: vec![PathBuf::from("src")],
7426 include: vec!["**/*.rs".to_string()],
7427 exclude: Vec::new(),
7428 kind: TargetKind::Source,
7429 }],
7430 local_ai: Default::default(),
7431 watch: Default::default(),
7432 };
7433 let db = IndexDatabase::rebuild(&config).unwrap();
7434
7435 let churn = db
7436 .repo_brief(crate::query::repo_brief::RepoBriefOptions {
7437 mode: crate::query::repo_brief::RepoBriefMode::Churn,
7438 limit: 1,
7439 include_generated: false,
7440 include_memories: true,
7441 })
7442 .unwrap();
7443 assert_eq!(churn.candidates[0].path, "src/hot.rs");
7444 assert_eq!(churn.candidates[0].category, "recent_churn_hotspot");
7445 assert!(churn.candidates[0].score <= 1.0);
7446 assert!(churn.candidates[0].metrics.commit_touch_count >= 4);
7447 assert!(churn.candidates[0].why.iter().any(|reason| reason.contains("churn")));
7448
7449 let god_modules = db
7450 .repo_brief(crate::query::repo_brief::RepoBriefOptions {
7451 mode: crate::query::repo_brief::RepoBriefMode::GodModules,
7452 limit: 1,
7453 include_generated: false,
7454 include_memories: true,
7455 })
7456 .unwrap();
7457 assert_eq!(god_modules.candidates[0].path, "src/hot.rs");
7458 assert!(god_modules.candidates[0].score <= 1.0);
7459 assert!(god_modules.candidates[0].metrics.symbol_count >= 30);
7460 assert!(!god_modules.candidates[0].split_hints.is_empty());
7461 assert!(
7462 god_modules.candidates[0].next_tools.iter().any(|tool| tool.tool == "impact_surface")
7463 );
7464
7465 fs::remove_dir_all(root).unwrap();
7466 }
7467
7468 #[test]
7469 fn repo_clusters_groups_cotouched_files() {
7470 let root = unique_temp_root();
7471 let _ = fs::remove_dir_all(&root);
7472 fs::create_dir_all(root.join("src/sync")).unwrap();
7473 fs::create_dir_all(root.join("src/ui")).unwrap();
7474 run_git(&root, &["init"]);
7475 run_git(&root, &["config", "user.name", "Rag Rat"]);
7476 run_git(&root, &["config", "user.email", "rag@example.com"]);
7477
7478 fs::write(root.join("src/sync/actor.rs"), "pub fn sync_actor() -> i32 { 1 }\n").unwrap();
7479 fs::write(root.join("src/sync/msg.rs"), "pub fn sync_msg() -> i32 { 2 }\n").unwrap();
7480 fs::write(root.join("src/ui/app.rs"), "pub fn ui_app() -> i32 { 3 }\n").unwrap();
7481 run_git(&root, &["add", "."]);
7482 run_git(&root, &["commit", "-m", "Add modules"]);
7483
7484 for revision in 1..=2 {
7485 fs::write(
7486 root.join("src/sync/actor.rs"),
7487 format!("pub fn sync_actor() -> i32 {{ {revision} }}\n"),
7488 )
7489 .unwrap();
7490 fs::write(
7491 root.join("src/sync/msg.rs"),
7492 format!("pub fn sync_msg() -> i32 {{ {} }}\n", revision + 10),
7493 )
7494 .unwrap();
7495 run_git(&root, &["add", "src/sync/actor.rs", "src/sync/msg.rs"]);
7496 run_git(&root, &["commit", "-m", "Iterate sync modules"]);
7497 }
7498
7499 let config = Config {
7500 root: root.clone(),
7501 database: root.join(".rag-rat/index.sqlite"),
7502 targets: vec![ResolvedTarget {
7503 name: "rust".to_string(),
7504 language: Language::Rust,
7505 directories: vec![PathBuf::from("src")],
7506 include: vec!["**/*.rs".to_string()],
7507 exclude: Vec::new(),
7508 kind: TargetKind::Source,
7509 }],
7510 local_ai: Default::default(),
7511 watch: Default::default(),
7512 };
7513 let db = IndexDatabase::rebuild(&config).unwrap();
7514
7515 let clusters = db
7516 .repo_clusters(crate::query::clusters::RepoClustersOptions {
7517 limit: 5,
7518 include_generated: false,
7519 include_memories: true,
7520 min_cluster_size: 2,
7521 })
7522 .unwrap();
7523
7524 let sync_cluster = clusters
7525 .clusters
7526 .iter()
7527 .find(|cluster| cluster.name == "src/sync")
7528 .expect("sync cluster");
7529 assert!(sync_cluster.representative_paths.contains(&"src/sync/actor.rs".to_string()));
7530 assert!(sync_cluster.representative_paths.contains(&"src/sync/msg.rs".to_string()));
7531 assert!(sync_cluster.metrics.co_touch_edges >= 2);
7532
7533 fs::remove_dir_all(root).unwrap();
7534 }
7535
7536 fn hot_module_text(revision: usize) -> String {
7537 let mut text = String::new();
7538 text.push_str("pub fn entry() -> i32 {\n");
7539 for i in 0..32 {
7540 text.push_str(&format!(" helper_{i}() +\n"));
7541 }
7542 text.push_str(&format!(" {revision}\n}}\n"));
7543 for i in 0..32 {
7544 text.push_str(&format!("pub fn helper_{i}() -> i32 {{ {i} }}\n"));
7545 }
7546 text
7547 }
7548
7549 fn unique_temp_root() -> PathBuf {
7550 let mut root = std::env::temp_dir();
7551 let suffix = TEMP_COUNTER.fetch_add(1, Ordering::Relaxed);
7552 root.push(format!("rag-rat-schema-test-{}-{}-{suffix}", std::process::id(), now_ms()));
7553 root
7554 }
7555
7556 fn fixture_temp_root(fixture: &str) -> PathBuf {
7557 let root = unique_temp_root();
7558 let _ = fs::remove_dir_all(&root);
7559 let fixture_root =
7560 PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../tests/fixtures").join(fixture);
7561 copy_fixture_dir(&fixture_root, &root);
7562 root
7563 }
7564
7565 fn copy_fixture_dir(from: &Path, to: &Path) {
7566 fs::create_dir_all(to).unwrap();
7567 for entry in fs::read_dir(from).unwrap() {
7568 let entry = entry.unwrap();
7569 let from_path = entry.path();
7570 let to_path = to.join(entry.file_name());
7571 if from_path.is_dir() {
7572 copy_fixture_dir(&from_path, &to_path);
7573 } else {
7574 fs::copy(&from_path, &to_path).unwrap();
7575 }
7576 }
7577 }
7578
7579 fn markdown_config(text: &str) -> (PathBuf, Config) {
7580 let root = unique_temp_root();
7581 let _ = fs::remove_dir_all(&root);
7582 let docs = root.join("docs");
7583 fs::create_dir_all(&docs).unwrap();
7584 fs::write(docs.join("search.md"), text).unwrap();
7585 let config = markdown_config_for_root(root.clone());
7586 (root, config)
7587 }
7588
7589 fn markdown_config_for_root(root: PathBuf) -> Config {
7590 Config {
7591 root: root.clone(),
7592 database: root.join(".rag-rat/index.sqlite"),
7593 targets: vec![ResolvedTarget {
7594 name: "markdown".to_string(),
7595 language: Language::Markdown,
7596 directories: vec![PathBuf::from("docs")],
7597 include: vec!["**/*.md".to_string()],
7598 exclude: Vec::new(),
7599 kind: TargetKind::Docs,
7600 }],
7601 local_ai: Default::default(),
7602 watch: Default::default(),
7603 }
7604 }
7605
7606 fn source_config(root: PathBuf, language: Language) -> Config {
7607 Config {
7608 root: root.clone(),
7609 database: root.join(".rag-rat/index.sqlite"),
7610 targets: vec![ResolvedTarget {
7611 name: language.as_str().to_string(),
7612 language,
7613 directories: vec![PathBuf::from("src")],
7614 include: vec!["src/".to_string()],
7615 exclude: Vec::new(),
7616 kind: TargetKind::Source,
7617 }],
7618 local_ai: Default::default(),
7619 watch: Default::default(),
7620 }
7621 }
7622
7623 fn assert_edge(db: &IndexDatabase, from: &str, to: &str, edge_kind: &str, confidence: &str) {
7624 let count = db
7625 .storage
7626 .connection()
7627 .query_row(
7628 "
7629 SELECT COUNT(*)
7630 FROM edges
7631 WHERE edge_kind = ?1
7632 AND confidence = ?2
7633 AND COALESCE(from_name, '') LIKE ?3
7634 AND to_name LIKE ?4
7635 ",
7636 params![edge_kind, confidence, format!("%{from}%"), format!("%{to}%")],
7637 |row| row.get::<_, i64>(0),
7638 )
7639 .unwrap();
7640 assert!(count > 0, "missing edge {from} -[{edge_kind}/{confidence}]-> {to}");
7641 }
7642
7643 #[test]
7644 fn rebuild_restores_durable_wal_after_bulk_build() {
7645 let root = unique_temp_root();
7648 let _ = fs::remove_dir_all(&root);
7649 fs::create_dir_all(root.join("src")).unwrap();
7650 fs::write(root.join("src/lib.rs"), "pub fn alpha() {}\npub fn beta() {}\n").unwrap();
7651 let config = source_config(root.clone(), Language::Rust);
7652 let db = IndexDatabase::rebuild(&config).unwrap();
7653
7654 let journal_mode: String = db
7655 .storage
7656 .connection()
7657 .query_row("PRAGMA journal_mode", [], |row| row.get(0))
7658 .unwrap();
7659 assert_eq!(journal_mode.to_lowercase(), "wal", "rebuild must restore WAL durability");
7660 let synchronous: i64 = db
7661 .storage
7662 .connection()
7663 .query_row("PRAGMA synchronous", [], |row| row.get(0))
7664 .unwrap();
7665 assert_eq!(synchronous, 1, "synchronous must be restored to NORMAL (=1)");
7666 assert!(!db.symbols("alpha", Some(Language::Rust), 10).unwrap().is_empty());
7668
7669 fs::remove_dir_all(root).unwrap();
7670 }
7671
7672 fn table_count(db: &IndexDatabase, table: &str) -> i64 {
7673 db.storage
7674 .connection()
7675 .query_row("SELECT COUNT(*) FROM sqlite_master WHERE name = ?1", [table], |row| {
7676 row.get(0)
7677 })
7678 .unwrap()
7679 }
7680
7681 fn row_count(db: &IndexDatabase, table: &str) -> i64 {
7682 db.storage
7683 .connection()
7684 .query_row(&format!("SELECT COUNT(*) FROM {table}"), [], |row| row.get(0))
7685 .unwrap()
7686 }
7687
7688 fn chunk_columns(db: &IndexDatabase) -> Vec<String> {
7689 table_columns(db, "chunks")
7690 }
7691
7692 fn file_columns(db: &IndexDatabase) -> Vec<String> {
7693 table_columns(db, "files")
7694 }
7695
7696 fn table_columns(db: &IndexDatabase, table: &str) -> Vec<String> {
7697 let mut stmt =
7698 db.storage.connection().prepare(&format!("PRAGMA table_info({table})")).unwrap();
7699 stmt.query_map([], |row| row.get::<_, String>(1)).unwrap().map(Result::unwrap).collect()
7700 }
7701
7702 fn indexed_revision_count(db: &IndexDatabase) -> i64 {
7703 db.storage
7704 .connection()
7705 .query_row("SELECT COUNT(*) FROM files WHERE indexed_revision != ''", [], |row| {
7706 row.get(0)
7707 })
7708 .unwrap()
7709 }
7710
7711 fn chunk_source_revision_count(db: &IndexDatabase) -> i64 {
7712 db.storage
7713 .connection()
7714 .query_row("SELECT COUNT(*) FROM chunks WHERE source_revision != ''", [], |row| {
7715 row.get(0)
7716 })
7717 .unwrap()
7718 }
7719
7720 fn first_chunk_id(db: &IndexDatabase) -> i64 {
7721 db.storage
7722 .connection()
7723 .query_row("SELECT id FROM chunks ORDER BY id LIMIT 1", [], |row| row.get(0))
7724 .unwrap()
7725 }
7726
7727 fn run_git(root: &Path, args: &[&str]) {
7728 let output = Command::new("git").args(args).current_dir(root).output().unwrap();
7729 assert!(
7730 output.status.success(),
7731 "git {:?} failed\nstdout:\n{}\nstderr:\n{}",
7732 args,
7733 String::from_utf8_lossy(&output.stdout),
7734 String::from_utf8_lossy(&output.stderr)
7735 );
7736 }
7737
7738 struct MockGitHubClient;
7739
7740 impl github::GitHubClient for MockGitHubClient {
7741 fn issue(
7742 &self,
7743 owner: &str,
7744 repo: &str,
7745 number: i64,
7746 ) -> anyhow::Result<github::GitHubIssue> {
7747 Ok(github::GitHubIssue {
7748 owner: owner.to_string(),
7749 repo: repo.to_string(),
7750 number,
7751 html_url: format!("https://github.com/{owner}/{repo}/issues/{number}"),
7752 state: "open".to_string(),
7753 title: "Decision: keep sqlite".to_string(),
7754 body: "We decided sqlite is required for binary size.".to_string(),
7755 author: Some("octo".to_string()),
7756 created_at: Some("2026-01-01T00:00:00Z".to_string()),
7757 updated_at: Some("2026-01-02T00:00:00Z".to_string()),
7758 is_pull_request: true,
7759 })
7760 }
7761
7762 fn issue_comments(
7763 &self,
7764 owner: &str,
7765 repo: &str,
7766 number: i64,
7767 ) -> anyhow::Result<Vec<github::GitHubComment>> {
7768 Ok(vec![github::GitHubComment {
7769 id: 4201,
7770 owner: owner.to_string(),
7771 repo: repo.to_string(),
7772 number,
7773 html_url: format!("https://github.com/{owner}/{repo}/issues/{number}#comment-1"),
7774 body: "Rejected alternative: duckdb was too large.".to_string(),
7775 author: Some("octo".to_string()),
7776 created_at: Some("2026-01-01T01:00:00Z".to_string()),
7777 updated_at: Some("2026-01-01T01:00:00Z".to_string()),
7778 }])
7779 }
7780
7781 fn pull(
7782 &self,
7783 owner: &str,
7784 repo: &str,
7785 number: i64,
7786 ) -> anyhow::Result<Option<github::GitHubPullRequest>> {
7787 Ok(Some(github::GitHubPullRequest {
7788 owner: owner.to_string(),
7789 repo: repo.to_string(),
7790 number,
7791 html_url: format!("https://github.com/{owner}/{repo}/pull/{number}"),
7792 state: "open".to_string(),
7793 title: "Use sqlite".to_string(),
7794 body: "Constraint: normal queries must use cache only.".to_string(),
7795 author: Some("octo".to_string()),
7796 created_at: Some("2026-01-01T00:00:00Z".to_string()),
7797 updated_at: Some("2026-01-02T00:00:00Z".to_string()),
7798 merged_at: None,
7799 }))
7800 }
7801
7802 fn pull_reviews(
7803 &self,
7804 owner: &str,
7805 repo: &str,
7806 number: i64,
7807 ) -> anyhow::Result<Vec<github::GitHubReview>> {
7808 Ok(vec![github::GitHubReview {
7809 id: 4202,
7810 owner: owner.to_string(),
7811 repo: repo.to_string(),
7812 number,
7813 html_url: Some(format!("https://github.com/{owner}/{repo}/pull/{number}#review")),
7814 state: "COMMENTED".to_string(),
7815 body: "Risk: live crawling during search would be surprising.".to_string(),
7816 author: Some("reviewer".to_string()),
7817 submitted_at: Some("2026-01-01T02:00:00Z".to_string()),
7818 }])
7819 }
7820
7821 fn pull_review_comments(
7822 &self,
7823 owner: &str,
7824 repo: &str,
7825 number: i64,
7826 ) -> anyhow::Result<Vec<github::GitHubReviewComment>> {
7827 Ok(vec![github::GitHubReviewComment {
7828 id: 4203,
7829 owner: owner.to_string(),
7830 repo: repo.to_string(),
7831 number,
7832 path: Some("docs/search.md".to_string()),
7833 html_url: format!("https://github.com/{owner}/{repo}/pull/{number}#discussion"),
7834 body: "No longer use obsolete duckdb rationale.".to_string(),
7835 author: Some("reviewer".to_string()),
7836 created_at: Some("2026-01-01T03:00:00Z".to_string()),
7837 updated_at: Some("2026-01-01T03:00:00Z".to_string()),
7838 }])
7839 }
7840 }
7841
7842 struct PartiallyFailingGitHubClient;
7843
7844 impl github::GitHubClient for PartiallyFailingGitHubClient {
7845 fn issue(
7846 &self,
7847 owner: &str,
7848 repo: &str,
7849 number: i64,
7850 ) -> anyhow::Result<github::GitHubIssue> {
7851 if number == 404 {
7852 anyhow::bail!("gh: Not Found (HTTP 404)");
7853 }
7854 MockGitHubClient.issue(owner, repo, number)
7855 }
7856
7857 fn issue_comments(
7858 &self,
7859 owner: &str,
7860 repo: &str,
7861 number: i64,
7862 ) -> anyhow::Result<Vec<github::GitHubComment>> {
7863 MockGitHubClient.issue_comments(owner, repo, number)
7864 }
7865
7866 fn pull(
7867 &self,
7868 owner: &str,
7869 repo: &str,
7870 number: i64,
7871 ) -> anyhow::Result<Option<github::GitHubPullRequest>> {
7872 MockGitHubClient.pull(owner, repo, number)
7873 }
7874
7875 fn pull_reviews(
7876 &self,
7877 owner: &str,
7878 repo: &str,
7879 number: i64,
7880 ) -> anyhow::Result<Vec<github::GitHubReview>> {
7881 MockGitHubClient.pull_reviews(owner, repo, number)
7882 }
7883
7884 fn pull_review_comments(
7885 &self,
7886 owner: &str,
7887 repo: &str,
7888 number: i64,
7889 ) -> anyhow::Result<Vec<github::GitHubReviewComment>> {
7890 MockGitHubClient.pull_review_comments(owner, repo, number)
7891 }
7892 }
7893}