1pub mod ai;
2pub mod anchors;
3pub mod chunker;
4pub mod edges;
5pub mod git_history;
6pub mod github;
7pub mod parser;
8pub mod schema;
9pub mod symbols;
10pub mod walker;
11
12#[cfg(test)]
13mod anchor_tests;
14#[cfg(test)]
15mod parser_tests;
16
17use std::{
18 collections::{BTreeMap, BTreeSet},
19 fs,
20 path::{Path, PathBuf},
21 process::Command,
22 sync::{
23 atomic::{AtomicUsize, Ordering},
24 mpsc,
25 },
26 thread,
27 thread::JoinHandle,
28 time::{SystemTime, UNIX_EPOCH},
29};
30
31use gix::{
32 bstr::{BString, ByteSlice},
33 status::{UntrackedFiles, tree_index},
34};
35use rayon::prelude::*;
36use regex::Regex;
37use rusqlite::{OptionalExtension, params};
38use serde::Serialize;
39use sha2::{Digest, Sha256};
40use thiserror::Error;
41
42use crate::{
43 config::{Config, TargetKind},
44 index::{
45 ai::{LocalAiStatus, ModelInfo, ReconcilePlan, ReconcileReport},
46 anchors::{AnchorStatus, ChunkAnchor},
47 chunker::Chunk,
48 git_history::{
49 ChunkBlameSummary, CommitSearchHit, GitHistoryIndexStatus, PathHistoryItem,
50 QueryCommitHit, SymbolHistoryItem,
51 },
52 github::{GitHubEvidence, GitHubStatus, GitHubSyncReport, Papertrail},
53 symbols::Symbol,
54 },
55 language::Language,
56 query::graph_meta::{self, GraphMetaMode},
57 search::lexical::{SearchHit, SearchOptions},
58 storage::IndexConnection,
59 storage::StorageStatus,
60};
61
62#[derive(Debug)]
63pub struct IndexDatabase {
64 storage: IndexConnection,
65 pub active_commit_sha: String,
66 pub active_worktree_id: String,
67}
68
69#[derive(Debug, Clone)]
70pub enum IndexProgress {
71 Started {
72 database: PathBuf,
73 mode: IndexMode,
74 },
75 Discovering,
76 Discovered {
77 files: usize,
78 },
79 PreparingFile {
80 current: usize,
81 total: usize,
82 path: PathBuf,
83 language: Language,
84 kind: TargetKind,
85 },
86 IndexingFile {
87 current: usize,
88 total: usize,
89 path: PathBuf,
90 language: Language,
91 kind: TargetKind,
92 },
93 IndexingGitHistory,
94 RebuildingLogicalSymbols,
95 ResolvingGraph,
96 SyncingFts,
97 RebuildingFts,
98 Finished {
99 files: usize,
100 },
101}
102
103#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
104#[serde(rename_all = "snake_case")]
105pub enum IndexMode {
106 Changed,
107 Discover,
108 Full,
109}
110
111impl IndexMode {
112 pub fn label(self) -> &'static str {
113 match self {
114 Self::Changed => "changed files",
115 Self::Discover => "discovery",
116 Self::Full => "full rebuild",
117 }
118 }
119}
120
121#[derive(Debug, Serialize)]
122pub struct IndexStatus {
123 pub database: String,
124 pub exists: bool,
125 pub schema: schema::SchemaStatus,
126 pub git_commit: Option<String>,
127 pub git_dirty: Option<bool>,
128 pub indexed_at_ms: Option<i64>,
129 pub content_revision: String,
130 pub fts_synced_at_ms: Option<i64>,
131 pub fts_source_revision: Option<String>,
132 pub fts_dirty: bool,
133 pub fts_fresh: bool,
134 pub file_count_by_language: BTreeMap<String, u64>,
135 pub parser_failures: u64,
136 pub parser_failure_paths: Vec<ParserFailure>,
137 pub git_history: GitHistoryIndexStatus,
138 pub github: GitHubStatus,
139 pub local_ai: LocalAiStatus,
140}
141
142#[derive(Debug, Serialize)]
143pub struct HealIndexReport {
144 pub checked_files: u64,
145 pub healed_files: u64,
146 pub removed_files: u64,
147 pub skipped_files: u64,
148 pub fts_fresh: bool,
149 pub message: Option<String>,
150}
151
152#[derive(Debug, Serialize)]
153pub struct ParserFailure {
154 pub path: String,
155 pub language: String,
156 pub message: String,
157}
158
159#[derive(Debug, Serialize)]
160pub struct DiscoveryStatus {
161 pub discovered_files: usize,
162 pub indexed_files: usize,
163 pub unindexed_files: usize,
164 pub unindexed_source_files: usize,
165 pub changed_indexed_files: usize,
166 pub removed_indexed_files: usize,
167 pub unindexed_sample: Vec<String>,
168 pub warning: Option<String>,
169}
170
171const MAX_AUTO_HEAL_FILES_PER_CALL: usize = 4;
172const GRAPH_INDEX_VERSION: &str = "6";
173
174#[derive(Debug, Error)]
175pub enum IndexError {
176 #[error("Gone: indexed chunk {chunk_id} no longer exists")]
177 Gone { chunk_id: i64 },
178 #[error("StaleChunk: chunk {chunk_id} in {path} could not be relocated after reindex")]
179 StaleChunk { chunk_id: i64, path: String },
180 #[error("needs_reindex: {stale_files} stale files exceeds automatic heal cap {cap}")]
181 NeedsReindex { stale_files: usize, cap: usize },
182}
183
184impl IndexDatabase {
185 pub fn open(path: &Path) -> anyhow::Result<Self> {
186 Self::open_with_graph_check(path, true)
187 }
188
189 pub fn database_path(&self) -> &Path {
190 self.storage.database_path()
191 }
192
193 fn open_with_graph_check(path: &Path, check_graph: bool) -> anyhow::Result<Self> {
194 let mut storage = IndexConnection::open(path)?;
195 schema::check_compatible(storage.connection())?;
196 ai::ensure_model_manifest(storage.connection())?;
197 if let Some(root) = meta_for(storage.connection(), "source_root")? {
198 storage.set_source_root(PathBuf::from(root));
199 }
200 let db =
201 Self { storage, active_commit_sha: String::new(), active_worktree_id: String::new() };
202 if check_graph {
203 db.ensure_graph_index_current()?;
204 }
205 Ok(db)
206 }
207
208 pub fn open_config(config: &Config) -> anyhow::Result<Self> {
209 let mut db = Self::open_with_graph_check(&config.database, false)?;
210 db.storage.set_source_root(config.root.clone());
211 let (commit_sha, worktree_id) = resolve_git_context(&config.root);
212 db.set_context(&commit_sha, &worktree_id)?;
213 db.ensure_graph_index_current()?;
214 Ok(db)
215 }
216
217 pub fn migrate(path: &Path) -> anyhow::Result<schema::SchemaStatus> {
218 Self::migrate_with_fastembed_cache(path, None)
219 }
220
221 fn migrate_with_fastembed_cache(
222 path: &Path,
223 fastembed_cache_dir: Option<&Path>,
224 ) -> anyhow::Result<schema::SchemaStatus> {
225 let storage = IndexConnection::open(path)?;
226 let status = schema::status(storage.connection())?;
227 match status.state {
228 schema::SchemaState::Newer | schema::SchemaState::Dirty => {
229 anyhow::bail!("{}", status.message);
230 },
231 schema::SchemaState::Compatible => {},
232 schema::SchemaState::Missing | schema::SchemaState::Older => {
233 schema::apply(storage.connection())?;
234 },
235 }
236 ai::ensure_model_manifest(storage.connection())?;
237 if let Some(fastembed_cache_dir) = fastembed_cache_dir {
238 ai::recover_cached_fastembed_model_from(storage.connection(), fastembed_cache_dir)?;
239 } else {
240 ai::recover_cached_fastembed_model(storage.connection())?;
241 }
242 schema::status(storage.connection())
243 }
244
245 pub fn migration_check(path: &Path) -> anyhow::Result<schema::SchemaStatus> {
246 let storage = IndexConnection::open(path)?;
247 schema::status(storage.connection())
248 }
249
250 fn create_or_migrate(path: &Path) -> anyhow::Result<Self> {
251 let mut storage = IndexConnection::open(path)?;
252 schema::apply(storage.connection())?;
253 ai::ensure_model_manifest(storage.connection())?;
254 if let Some(root) = meta_for(storage.connection(), "source_root")? {
255 storage.set_source_root(PathBuf::from(root));
256 }
257 Ok(Self { storage, active_commit_sha: String::new(), active_worktree_id: String::new() })
258 }
259
260 pub fn set_context(&mut self, commit_sha: &str, worktree_id: &str) -> anyhow::Result<()> {
261 self.active_commit_sha = commit_sha.to_string();
262 self.active_worktree_id = worktree_id.to_string();
263
264 let conn = self.storage.connection();
265 conn.execute_batch(
266 "
267 CREATE TEMP TABLE IF NOT EXISTS connection_context(key TEXT PRIMARY KEY, value TEXT);
268 ",
269 )?;
270
271 let mut stmt = conn.prepare(
272 "INSERT OR REPLACE INTO temp.connection_context(key, value) VALUES (?1, ?2)",
273 )?;
274 stmt.execute(params!["commit_sha", commit_sha])?;
275 stmt.execute(params!["worktree_id", worktree_id])?;
276
277 conn.execute_batch("
278 DROP VIEW IF EXISTS temp.files;
279 CREATE TEMP VIEW temp.files AS
280 SELECT id, path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms, indexed_revision, commit_sha, worktree_id
281 FROM main.files
282 WHERE worktree_id = (SELECT value FROM temp.connection_context WHERE key = 'worktree_id') AND worktree_id != '' AND kind != 'deleted'
283 UNION ALL
284 SELECT id, path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms, indexed_revision, commit_sha, worktree_id
285 FROM main.files
286 WHERE commit_sha = (SELECT value FROM temp.connection_context WHERE key = 'commit_sha')
287 AND commit_sha != ''
288 AND path NOT IN (
289 SELECT path FROM main.files
290 WHERE worktree_id = (SELECT value FROM temp.connection_context WHERE key = 'worktree_id')
291 AND worktree_id != ''
292 );
293 ")?;
294
295 Ok(())
296 }
297
298 pub fn rebuild(config: &Config) -> anyhow::Result<Self> {
299 Self::rebuild_with_progress(config, |_| {})
300 }
301
302 pub fn rebuild_with_progress<F>(config: &Config, mut progress: F) -> anyhow::Result<Self>
303 where
304 F: FnMut(IndexProgress),
305 {
306 progress(IndexProgress::Started {
307 database: config.database.clone(),
308 mode: IndexMode::Full,
309 });
310 let mut db = Self::create_or_migrate(&config.database)?;
311 let (commit_sha, worktree_id) = resolve_git_context(&config.root);
312 db.set_context(&commit_sha, &worktree_id)?;
313 progress(IndexProgress::IndexingGitHistory);
314 let mut git_history = Some(spawn_git_history_prepare(&config.root));
315 let result = (|| -> anyhow::Result<()> {
316 db.storage.execute_batch("BEGIN TRANSACTION")?;
317 db.clear_full_rebuild_tables()?;
318 db.set_meta("source_root", &config.root.display().to_string())?;
319 db.storage.set_source_root(config.root.clone());
320 db.write_git_meta(&config.root)?;
321 let indexed = db.index_targets_with_progress(config, &mut progress)?;
322 db.apply_prepared_git_history(
323 &config.root,
324 git_history
325 .take()
326 .ok_or_else(|| anyhow::anyhow!("git history preparation was already used"))?,
327 )?;
328 progress(IndexProgress::RebuildingLogicalSymbols);
329 db.rebuild_logical_symbols()?;
330 progress(IndexProgress::ResolvingGraph);
331 db.resolve_edges()?;
332 db.mark_graph_index_current()?;
333 progress(IndexProgress::RebuildingFts);
334 db.rebuild_fts()?;
335 db.set_meta("indexed_at_ms", &now_ms().to_string())?;
336 db.storage.execute_batch("COMMIT")?;
337 progress(IndexProgress::Finished { files: indexed });
338 Ok(())
339 })();
340 if result.is_err() {
341 if let Some(handle) = git_history.take() {
342 let _ = join_git_history_prepare(handle);
343 }
344 let _ = db.storage.execute_batch("ROLLBACK");
345 }
346 result?;
347 Ok(db)
348 }
349
350 fn clear_full_rebuild_tables(&self) -> anyhow::Result<()> {
351 self.storage.execute_batch(
352 "
353 CREATE TEMP TABLE IF NOT EXISTS full_rebuild_file_ids(id INTEGER PRIMARY KEY);
354 DELETE FROM temp.full_rebuild_file_ids;
355 INSERT OR IGNORE INTO temp.full_rebuild_file_ids(id)
356 SELECT id
357 FROM main.files
358 WHERE worktree_id = (SELECT value FROM temp.connection_context WHERE key = 'worktree_id')
359 AND worktree_id != '';
360 INSERT OR IGNORE INTO temp.full_rebuild_file_ids(id)
361 SELECT id
362 FROM main.files
363 WHERE commit_sha = (SELECT value FROM temp.connection_context WHERE key = 'commit_sha')
364 AND commit_sha != ''
365 AND path NOT IN (
366 SELECT path FROM main.files
367 WHERE worktree_id = (SELECT value FROM temp.connection_context WHERE key = 'worktree_id')
368 AND worktree_id != ''
369 );
370
371 UPDATE main.edges
372 SET to_symbol_id = NULL,
373 target_start_line = NULL,
374 target_end_line = NULL,
375 resolution = 'unresolved'
376 WHERE to_symbol_id IN (
377 SELECT symbols.id
378 FROM main.symbols
379 JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = symbols.file_id
380 );
381 DELETE FROM main.edges
382 WHERE source_file_id IN (SELECT id FROM temp.full_rebuild_file_ids)
383 OR from_symbol_id IN (
384 SELECT symbols.id
385 FROM main.symbols
386 JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = symbols.file_id
387 );
388
389 DELETE FROM main.logical_symbol_members
390 WHERE symbol_id IN (
391 SELECT symbols.id
392 FROM main.symbols
393 JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = symbols.file_id
394 );
395 DELETE FROM main.logical_symbols
396 WHERE id NOT IN (
397 SELECT logical_symbol_id FROM main.logical_symbol_members
398 );
399 DELETE FROM main.symbol_facts
400 WHERE symbol_id IN (
401 SELECT symbols.id
402 FROM main.symbols
403 JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = symbols.file_id
404 );
405 DELETE FROM main.chunk_fts
406 WHERE rowid IN (
407 SELECT chunks.id
408 FROM main.chunks
409 JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = chunks.file_id
410 );
411 DELETE FROM main.chunk_summaries
412 WHERE chunk_id IN (
413 SELECT chunks.id
414 FROM main.chunks
415 JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = chunks.file_id
416 );
417 DELETE FROM main.chunk_embeddings
418 WHERE chunk_id IN (
419 SELECT chunks.id
420 FROM main.chunks
421 JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = chunks.file_id
422 );
423 DELETE FROM main.git_chunk_blame
424 WHERE chunk_id IN (
425 SELECT chunks.id
426 FROM main.chunks
427 JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = chunks.file_id
428 );
429 DELETE FROM main.docs
430 WHERE chunk_id IN (
431 SELECT chunks.id
432 FROM main.chunks
433 JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = chunks.file_id
434 );
435 DELETE FROM main.parser_failures
436 WHERE path IN (
437 SELECT path
438 FROM main.files
439 JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = files.id
440 );
441 DELETE FROM main.symbols
442 WHERE file_id IN (SELECT id FROM temp.full_rebuild_file_ids);
443 DELETE FROM main.chunks
444 WHERE file_id IN (SELECT id FROM temp.full_rebuild_file_ids);
445 DELETE FROM main.files
446 WHERE id IN (SELECT id FROM temp.full_rebuild_file_ids);
447 DELETE FROM temp.full_rebuild_file_ids;
448 ",
449 )?;
450 Ok(())
451 }
452
453 pub fn index_changed(config: &Config) -> anyhow::Result<Self> {
454 Self::index_changed_with_progress(config, |_| {})
455 }
456
457 pub fn index_changed_with_progress<F>(config: &Config, mut progress: F) -> anyhow::Result<Self>
458 where
459 F: FnMut(IndexProgress),
460 {
461 Self::index_incremental_with_progress(config, IndexMode::Changed, &mut progress)
462 }
463
464 pub fn index_discover(config: &Config) -> anyhow::Result<Self> {
465 Self::index_discover_with_progress(config, |_| {})
466 }
467
468 pub fn index_discover_with_progress<F>(config: &Config, mut progress: F) -> anyhow::Result<Self>
469 where
470 F: FnMut(IndexProgress),
471 {
472 Self::index_incremental_with_progress(config, IndexMode::Discover, &mut progress)
473 }
474
475 fn index_incremental_with_progress<F>(
476 config: &Config,
477 mode: IndexMode,
478 progress: &mut F,
479 ) -> anyhow::Result<Self>
480 where
481 F: FnMut(IndexProgress),
482 {
483 if !config.database.exists() {
484 return Self::rebuild_with_progress(config, progress);
485 }
486 if Self::migration_check(&config.database)?.state == schema::SchemaState::Missing {
487 return Self::rebuild_with_progress(config, progress);
488 }
489
490 let mut db = Self::open(&config.database)?;
491 let (commit_sha, worktree_id) = resolve_git_context(&config.root);
492 db.set_context(&commit_sha, &worktree_id)?;
493 if db.indexed_file_count()? == 0 {
494 return Self::rebuild_with_progress(config, progress);
495 }
496 progress(IndexProgress::Started { database: config.database.clone(), mode });
497 progress(IndexProgress::IndexingGitHistory);
498 let mut git_history = Some(spawn_git_history_prepare(&config.root));
499 let result = (|| -> anyhow::Result<()> {
500 db.storage.execute_batch("BEGIN TRANSACTION")?;
501 db.set_meta("source_root", &config.root.display().to_string())?;
502 db.storage.set_source_root(config.root.clone());
503 db.write_git_meta(&config.root)?;
504 let indexed = match mode {
505 IndexMode::Changed => db.index_changed_files_with_progress(config, progress)?,
506 IndexMode::Discover => db.index_discovered_files_with_progress(config, progress)?,
507 IndexMode::Full => unreachable!("full mode is handled by rebuild_with_progress"),
508 };
509 db.apply_prepared_git_history(
510 &config.root,
511 git_history
512 .take()
513 .ok_or_else(|| anyhow::anyhow!("git history preparation was already used"))?,
514 )?;
515 if indexed > 0 {
516 progress(IndexProgress::RebuildingLogicalSymbols);
517 db.rebuild_logical_symbols()?;
518 progress(IndexProgress::ResolvingGraph);
519 db.resolve_edges()?;
520 db.mark_graph_index_current()?;
521 progress(IndexProgress::SyncingFts);
522 db.sync_fts()?;
523 }
524 db.set_meta("indexed_at_ms", &now_ms().to_string())?;
525 db.storage.execute_batch("COMMIT")?;
526 progress(IndexProgress::Finished { files: indexed });
527 Ok(())
528 })();
529 if result.is_err() {
530 if let Some(handle) = git_history.take() {
531 let _ = join_git_history_prepare(handle);
532 }
533 let _ = db.storage.execute_batch("ROLLBACK");
534 }
535 result?;
536 Ok(db)
537 }
538
539 pub fn index_targets(&self, config: &Config) -> anyhow::Result<()> {
540 self.index_targets_with_progress(config, &mut |_| {})?;
541 Ok(())
542 }
543
544 fn index_targets_with_progress<F>(
545 &self,
546 config: &Config,
547 progress: &mut F,
548 ) -> anyhow::Result<usize>
549 where
550 F: FnMut(IndexProgress),
551 {
552 progress(IndexProgress::Discovering);
553 let files = collect_index_files(config)?;
554 let changes = git_changed_paths(&config.root).unwrap_or_default();
555 let files = self.assign_file_scopes(files, &changes);
556 progress(IndexProgress::Discovered { files: files.len() });
557
558 let prepared = prepare_files_with_progress(&files, progress)?;
559 for (index, prepared_file) in prepared.iter().enumerate() {
560 let current = index + 1;
561 if should_report_file_progress(current, files.len()) {
562 progress(IndexProgress::IndexingFile {
563 current,
564 total: files.len(),
565 path: prepared_file.file.relative_path.clone(),
566 language: prepared_file.file.language,
567 kind: prepared_file.file.kind,
568 });
569 }
570 self.insert_prepared_file(prepared_file)?;
571 }
572
573 Ok(files.len())
574 }
575
576 fn index_changed_files_with_progress<F>(
577 &self,
578 config: &Config,
579 progress: &mut F,
580 ) -> anyhow::Result<usize>
581 where
582 F: FnMut(IndexProgress),
583 {
584 progress(IndexProgress::Discovering);
585 let changes = git_changed_paths(&config.root)?;
586 let files = collect_changed_index_files(config, &changes)?;
587 let files = self.assign_file_scopes(files, &changes);
588 self.apply_incremental_file_plan(files, changes.deleted, progress)
589 }
590
591 fn index_discovered_files_with_progress<F>(
592 &self,
593 config: &Config,
594 progress: &mut F,
595 ) -> anyhow::Result<usize>
596 where
597 F: FnMut(IndexProgress),
598 {
599 progress(IndexProgress::Discovering);
600 let plan = discovery_plan(self.storage.connection(), config)?;
601 let changes = git_changed_paths(&config.root).unwrap_or_default();
602 let files = self.assign_file_scopes(plan.files, &changes);
603 self.apply_incremental_file_plan(files, plan.deleted, progress)
604 }
605
606 fn assign_file_scopes(
607 &self,
608 files: Vec<IndexFile>,
609 changes: &GitChangedPaths,
610 ) -> Vec<IndexFile> {
611 let has_base_commit = !self.active_commit_sha.is_empty();
612 files
613 .into_iter()
614 .map(|mut file| {
615 if !has_base_commit || changes.changed.contains(&file.relative_path) {
616 file.commit_sha.clear();
617 file.worktree_id.clone_from(&self.active_worktree_id);
618 } else {
619 file.commit_sha.clone_from(&self.active_commit_sha);
620 file.worktree_id.clear();
621 }
622 file
623 })
624 .collect()
625 }
626
627 fn apply_incremental_file_plan<F>(
628 &self,
629 files: Vec<IndexFile>,
630 deleted: BTreeSet<PathBuf>,
631 progress: &mut F,
632 ) -> anyhow::Result<usize>
633 where
634 F: FnMut(IndexProgress),
635 {
636 progress(IndexProgress::Discovered { files: files.len() });
637
638 let deleted_count = deleted.len();
639 for path in deleted {
640 self.mark_file_deleted(&path)?;
641 }
642
643 let prepared = prepare_files_with_progress(&files, progress)?;
644 for (index, prepared_file) in prepared.iter().enumerate() {
645 let current = index + 1;
646 if should_report_file_progress(current, files.len()) {
647 progress(IndexProgress::IndexingFile {
648 current,
649 total: files.len(),
650 path: prepared_file.file.relative_path.clone(),
651 language: prepared_file.file.language,
652 kind: prepared_file.file.kind,
653 });
654 }
655 self.remove_file_in_scope(
656 &prepared_file.file.relative_path,
657 &prepared_file.file.commit_sha,
658 &prepared_file.file.worktree_id,
659 )?;
660 self.insert_prepared_file(prepared_file)?;
661 }
662
663 Ok(files.len() + deleted_count)
664 }
665
666 pub fn status(&self, database: &Path) -> anyhow::Result<IndexStatus> {
667 let mut counts = BTreeMap::new();
668 let mut stmt = self
669 .storage
670 .connection()
671 .prepare("SELECT language, COUNT(*) FROM files GROUP BY language ORDER BY language")?;
672 let rows =
673 stmt.query_map([], |row| Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?)))?;
674 for row in rows {
675 let (language, count) = row?;
676 counts.insert(language, u64::try_from(count).unwrap_or(0));
677 }
678
679 let content_revision = self.content_revision()?;
680 let fts_source_revision = self.meta("fts_source_revision")?;
681 let fts_dirty = self.fts_dirty()?;
682
683 Ok(IndexStatus {
684 database: database.display().to_string(),
685 exists: database.exists(),
686 schema: schema::status(self.storage.connection())?,
687 git_commit: self.meta("git_commit")?,
688 git_dirty: self.meta("git_dirty")?.map(|value| value == "true"),
689 indexed_at_ms: self.meta("indexed_at_ms")?.and_then(|value| value.parse::<i64>().ok()),
690 content_revision: content_revision.clone(),
691 fts_synced_at_ms: self
692 .meta("fts_synced_at_ms")?
693 .and_then(|value| value.parse::<i64>().ok()),
694 fts_dirty,
695 fts_fresh: !fts_dirty
696 && fts_source_revision.as_deref() == Some(content_revision.as_str()),
697 fts_source_revision,
698 file_count_by_language: counts,
699 parser_failures: self.parser_failure_count()?,
700 parser_failure_paths: self.parser_failure_paths()?,
701 git_history: self.git_history_status()?,
702 github: self.github_status()?,
703 local_ai: self.local_ai_status()?,
704 })
705 }
706
707 pub fn storage_status(&self) -> anyhow::Result<StorageStatus> {
708 self.storage.status()
709 }
710
711 pub fn discovery_status(&self, config: &Config) -> anyhow::Result<DiscoveryStatus> {
712 let plan = discovery_plan(self.storage.connection(), config)?;
713 let unindexed_source_files =
714 plan.unindexed.iter().filter(|file| file.kind == TargetKind::Source).count();
715 let unindexed_sample =
716 plan.unindexed.iter().take(10).map(|file| path_string(&file.relative_path)).collect();
717 let warning = (unindexed_source_files > 0).then(|| {
718 format!(
719 "{unindexed_source_files} unindexed source files detected. Run `rag-rat index --full` or `rag-rat index --discover`."
720 )
721 });
722 Ok(DiscoveryStatus {
723 discovered_files: plan.discovered_files,
724 indexed_files: plan.indexed_files,
725 unindexed_files: plan.unindexed.len(),
726 unindexed_source_files,
727 changed_indexed_files: plan.changed.len(),
728 removed_indexed_files: plan.deleted.len(),
729 unindexed_sample,
730 warning,
731 })
732 }
733
734 pub fn search(
735 &self,
736 query: &str,
737 limit: u32,
738 include_generated: bool,
739 ) -> anyhow::Result<Vec<SearchHit>> {
740 self.search_with_graph_meta(query, limit, include_generated, GraphMetaMode::Compact, 3)
741 }
742
743 pub fn search_explain(
744 &self,
745 query: &str,
746 limit: u32,
747 include_generated: bool,
748 ) -> anyhow::Result<Vec<SearchHit>> {
749 self.search_explain_with_graph_meta(
750 query,
751 limit,
752 include_generated,
753 GraphMetaMode::Compact,
754 3,
755 )
756 }
757
758 pub fn search_with_graph_meta(
759 &self,
760 query: &str,
761 limit: u32,
762 include_generated: bool,
763 graph_mode: GraphMetaMode,
764 graph_limit: u32,
765 ) -> anyhow::Result<Vec<SearchHit>> {
766 self.search_with_graph_meta_options(
767 query,
768 limit,
769 include_generated,
770 graph_mode,
771 graph_limit,
772 SearchOptions::default(),
773 )
774 }
775
776 pub fn search_with_graph_meta_options(
777 &self,
778 query: &str,
779 limit: u32,
780 include_generated: bool,
781 graph_mode: GraphMetaMode,
782 graph_limit: u32,
783 options: SearchOptions,
784 ) -> anyhow::Result<Vec<SearchHit>> {
785 self.ensure_fts_fresh()?;
786 let mut hits =
787 self.search_with_heal(query, limit, include_generated, true, false, options)?;
788 graph_meta::attach_to_search_hits(
789 self.storage.connection(),
790 &mut hits,
791 graph_mode,
792 graph_limit,
793 )?;
794 Ok(hits)
795 }
796
797 pub fn search_explain_with_graph_meta(
798 &self,
799 query: &str,
800 limit: u32,
801 include_generated: bool,
802 graph_mode: GraphMetaMode,
803 graph_limit: u32,
804 ) -> anyhow::Result<Vec<SearchHit>> {
805 self.search_explain_with_graph_meta_options(
806 query,
807 limit,
808 include_generated,
809 graph_mode,
810 graph_limit,
811 SearchOptions::default(),
812 )
813 }
814
815 pub fn search_explain_with_graph_meta_options(
816 &self,
817 query: &str,
818 limit: u32,
819 include_generated: bool,
820 graph_mode: GraphMetaMode,
821 graph_limit: u32,
822 options: SearchOptions,
823 ) -> anyhow::Result<Vec<SearchHit>> {
824 self.ensure_fts_fresh()?;
825 let mut hits =
826 self.search_with_heal(query, limit, include_generated, true, true, options)?;
827 graph_meta::attach_to_search_hits(
828 self.storage.connection(),
829 &mut hits,
830 graph_mode,
831 graph_limit,
832 )?;
833 Ok(hits)
834 }
835
836 pub fn symbols(
837 &self,
838 name: &str,
839 language: Option<Language>,
840 limit: u32,
841 ) -> anyhow::Result<Vec<crate::query::symbol::SymbolHit>> {
842 crate::query::symbol::lookup(self.storage.connection(), name, language, limit)
843 }
844
845 pub fn symbol_candidates(
846 &self,
847 selector: &crate::query::symbol::SymbolSelector,
848 ) -> anyhow::Result<crate::query::symbol::SymbolLookup> {
849 crate::query::symbol::lookup_candidates(self.storage.connection(), selector)
850 }
851
852 pub fn select_symbol(
853 &self,
854 selector: &crate::query::symbol::SymbolSelector,
855 ) -> anyhow::Result<
856 Result<Option<crate::query::symbol::SymbolHit>, crate::query::symbol::SymbolDisambiguation>,
857 > {
858 crate::query::symbol::select_one(self.storage.connection(), selector)
859 }
860
861 pub fn read_chunk(&self, chunk_id: i64) -> anyhow::Result<Option<crate::query::ReadChunk>> {
862 self.read_chunk_with_graph_and_memories(chunk_id, GraphMetaMode::Full, 20, true)
863 }
864
865 pub fn read_chunk_with_graph(
866 &self,
867 chunk_id: i64,
868 graph_mode: GraphMetaMode,
869 graph_limit: u32,
870 ) -> anyhow::Result<Option<crate::query::ReadChunk>> {
871 self.read_chunk_with_graph_and_memories(chunk_id, graph_mode, graph_limit, false)
872 }
873
874 pub fn read_chunk_with_graph_and_memories(
875 &self,
876 chunk_id: i64,
877 graph_mode: GraphMetaMode,
878 graph_limit: u32,
879 include_memories: bool,
880 ) -> anyhow::Result<Option<crate::query::ReadChunk>> {
881 let Some(mut chunk) = self.read_chunk_current(chunk_id)? else {
882 return Ok(None);
883 };
884 graph_meta::attach_to_read_chunk(
885 self.storage.connection(),
886 &mut chunk,
887 graph_mode,
888 graph_limit,
889 )?;
890 if include_memories {
891 chunk.memories =
892 crate::query::memory::memories_for_chunk(self.storage.connection(), chunk_id, 20)?;
893 }
894 Ok(Some(chunk))
895 }
896
897 fn read_chunk_current(&self, chunk_id: i64) -> anyhow::Result<Option<crate::query::ReadChunk>> {
898 let Some(mut chunk) = crate::query::read_chunk(self.storage.connection(), chunk_id)? else {
899 return Ok(None);
900 };
901 let Some(root) = self.storage.source_root() else {
902 return Ok(Some(chunk));
903 };
904 let source_path = root.join(&chunk.path);
905 let current_text = match fs::read_to_string(&source_path) {
906 Ok(text) => text,
907 Err(_) => {
908 let path = chunk.path.clone();
909 self.mark_file_deleted(Path::new(&path))?;
910 self.sync_fts()?;
911 anyhow::bail!(IndexError::Gone { chunk_id });
912 },
913 };
914 let anchor = self.chunk_anchor(chunk_id)?;
915 let status = anchors::validate(
916 &chunk.text,
917 usize::try_from(chunk.start_line).unwrap_or(1),
918 usize::try_from(chunk.end_line).unwrap_or(1),
919 &anchor,
920 ¤t_text,
921 );
922 match status {
923 AnchorStatus::Exact => {
924 if let Some(text) = anchors::slice_lines(
925 ¤t_text,
926 usize::try_from(chunk.start_line).unwrap_or(1),
927 usize::try_from(chunk.end_line).unwrap_or(1),
928 ) {
929 chunk.text = text;
930 }
931 Ok(Some(chunk))
932 },
933 AnchorStatus::Relocated { start_line, end_line, text } => {
934 chunk.start_line = i64::try_from(start_line)?;
935 chunk.end_line = i64::try_from(end_line)?;
936 chunk.text = text;
937 Ok(Some(chunk))
938 },
939 AnchorStatus::Stale => {
940 self.heal_file(Path::new(&chunk.path))?;
941 self.sync_fts()?;
942 let healed = crate::query::read_chunk(self.storage.connection(), chunk_id)?;
943 match healed {
944 Some(chunk) => Ok(Some(chunk)),
945 None => anyhow::bail!(IndexError::StaleChunk { chunk_id, path: chunk.path }),
946 }
947 },
948 }
949 }
950
951 pub fn search_hash_baseline(
952 &self,
953 query: &str,
954 limit: u32,
955 include_generated: bool,
956 ) -> anyhow::Result<Vec<SearchHit>> {
957 self.ensure_fts_fresh()?;
958 crate::search::lexical::search_hash_baseline(
959 self.storage.connection(),
960 query,
961 limit,
962 include_generated,
963 )
964 }
965
966 pub fn docs_for_symbol(&self, symbol: &str, limit: u32) -> anyhow::Result<Vec<SearchHit>> {
967 self.search(symbol, limit, true)
968 }
969
970 pub fn docs_for_selected_symbol(
971 &self,
972 symbol: &crate::query::symbol::SymbolHit,
973 limit: u32,
974 ) -> anyhow::Result<Vec<SearchHit>> {
975 let mut hits = self.local_symbol_context_hits(symbol, limit)?;
976 hits.extend(self.search(&symbol.name, limit.saturating_mul(4).max(limit), true)?);
977 rank_docs_for_symbol(symbol, &mut hits);
978 dedupe_search_hits(&mut hits);
979 hits.truncate(usize::try_from(limit).unwrap_or(usize::MAX));
980 Ok(hits)
981 }
982
983 pub fn commit_search(&self, query: &str, limit: u32) -> anyhow::Result<Vec<CommitSearchHit>> {
984 git_history::commit_search(self.storage.connection(), query, limit)
985 }
986
987 pub fn git_history_for_path(
988 &self,
989 path: &str,
990 limit: u32,
991 ) -> anyhow::Result<Vec<PathHistoryItem>> {
992 git_history::history_for_path(self.storage.connection(), path, limit)
993 }
994
995 pub fn git_history_for_symbol(
996 &self,
997 symbol: &str,
998 language: Option<Language>,
999 limit: u32,
1000 ) -> anyhow::Result<Vec<SymbolHistoryItem>> {
1001 let symbols = self.symbols(symbol, language, limit)?;
1002 let per_symbol_limit = limit.max(1);
1003 let mut out = Vec::new();
1004 for symbol_hit in symbols {
1005 for commit in self.git_history_for_path(&symbol_hit.path, per_symbol_limit)? {
1006 out.push(SymbolHistoryItem {
1007 symbol: symbol_hit.name.clone(),
1008 qualified_name: symbol_hit.qualified_name.clone(),
1009 path: symbol_hit.path.clone(),
1010 start_byte: symbol_hit.start_byte,
1011 end_byte: symbol_hit.end_byte,
1012 commit,
1013 evidence_kind: "historical",
1014 });
1015 if out.len() >= usize::try_from(limit).unwrap_or(usize::MAX) {
1016 return Ok(out);
1017 }
1018 }
1019 }
1020 Ok(out)
1021 }
1022
1023 pub fn commits_touching_query(
1024 &self,
1025 query: &str,
1026 limit: u32,
1027 ) -> anyhow::Result<Vec<QueryCommitHit>> {
1028 let current_hits = self.search(query, limit, true)?;
1029 git_history::commits_touching_query(self.storage.connection(), query, limit, ¤t_hits)
1030 }
1031
1032 pub fn git_blame_chunk(&self, chunk_id: i64) -> anyhow::Result<Option<ChunkBlameSummary>> {
1033 let Some(chunk) = self.read_chunk(chunk_id)? else {
1034 return Ok(None);
1035 };
1036 let source_text_hash = git_history::source_text_hash(&chunk.text);
1037 if let Some(cached) =
1038 git_history::cached_blame(self.storage.connection(), chunk_id, &source_text_hash)?
1039 {
1040 return Ok(Some(cached));
1041 }
1042 let Some(root) = self.storage.source_root() else {
1043 return Ok(Some(ChunkBlameSummary {
1044 chunk_id,
1045 path: chunk.path,
1046 start_line: chunk.start_line,
1047 end_line: chunk.end_line,
1048 source_text_hash,
1049 line_count: 0,
1050 dominant_commit: None,
1051 dominant_commit_lines: 0,
1052 newest_commit: None,
1053 newest_commit_time_s: None,
1054 oldest_commit: None,
1055 oldest_commit_time_s: None,
1056 commit_counts: BTreeMap::new(),
1057 evidence_kind: "historical",
1058 }));
1059 };
1060 let blame_lines =
1061 git_history::blame_lines(root, &chunk.path, chunk.start_line, chunk.end_line);
1062 let mut counts = BTreeMap::<String, i64>::new();
1063 let mut newest = None::<(String, i64)>;
1064 let mut oldest = None::<(String, i64)>;
1065 for line in &blame_lines {
1066 *counts.entry(line.commit.clone()).or_default() += 1;
1067 if let Some(time) = line.author_time_s {
1068 if newest.as_ref().is_none_or(|(_, newest_time)| time > *newest_time) {
1069 newest = Some((line.commit.clone(), time));
1070 }
1071 if oldest.as_ref().is_none_or(|(_, oldest_time)| time < *oldest_time) {
1072 oldest = Some((line.commit.clone(), time));
1073 }
1074 }
1075 }
1076 let dominant = counts
1077 .iter()
1078 .max_by_key(|(commit, count)| (*count, *commit))
1079 .map(|(commit, count)| (commit.clone(), *count));
1080 let summary = ChunkBlameSummary {
1081 chunk_id,
1082 path: chunk.path,
1083 start_line: chunk.start_line,
1084 end_line: chunk.end_line,
1085 source_text_hash,
1086 line_count: i64::try_from(blame_lines.len()).unwrap_or(i64::MAX),
1087 dominant_commit: dominant.as_ref().map(|(commit, _)| commit.clone()),
1088 dominant_commit_lines: dominant.map(|(_, count)| count).unwrap_or(0),
1089 newest_commit: newest.as_ref().map(|(commit, _)| commit.clone()),
1090 newest_commit_time_s: newest.as_ref().map(|(_, time)| *time),
1091 oldest_commit: oldest.as_ref().map(|(commit, _)| commit.clone()),
1092 oldest_commit_time_s: oldest.as_ref().map(|(_, time)| *time),
1093 commit_counts: counts,
1094 evidence_kind: "historical",
1095 };
1096 git_history::store_blame(self.storage.connection(), &summary)?;
1097 Ok(Some(summary))
1098 }
1099
1100 pub fn github_sync_from_refs(&self, offline: bool) -> anyhow::Result<GitHubSyncReport> {
1101 self.github_sync_from_refs_with_progress(offline, |_| {})
1102 }
1103
1104 pub fn github_sync_from_refs_with_progress(
1105 &self,
1106 offline: bool,
1107 progress: impl FnMut(github::GitHubSyncProgress),
1108 ) -> anyhow::Result<GitHubSyncReport> {
1109 let Some(root) = self.storage.source_root() else {
1110 anyhow::bail!("index has no source_root metadata; rebuild required");
1111 };
1112 if offline {
1113 github::sync_from_refs::<github::GhCliGitHubClient>(
1114 self.storage.connection(),
1115 root,
1116 None,
1117 true,
1118 )
1119 } else {
1120 let client = github::GhCliGitHubClient;
1121 github::sync_from_refs_with_progress(
1122 self.storage.connection(),
1123 root,
1124 Some(&client),
1125 false,
1126 progress,
1127 )
1128 }
1129 }
1130
1131 pub fn github_sync_issue(
1132 &self,
1133 issue_ref: &str,
1134 offline: bool,
1135 ) -> anyhow::Result<GitHubSyncReport> {
1136 if offline {
1137 github::sync_issue::<github::GhCliGitHubClient>(
1138 self.storage.connection(),
1139 issue_ref,
1140 None,
1141 true,
1142 )
1143 } else {
1144 let client = github::GhCliGitHubClient;
1145 github::sync_issue(self.storage.connection(), issue_ref, Some(&client), false)
1146 }
1147 }
1148
1149 pub fn github_issue_search(
1150 &self,
1151 query: &str,
1152 limit: u32,
1153 ) -> anyhow::Result<Vec<GitHubEvidence>> {
1154 github::issue_search(self.storage.connection(), query, limit)
1155 }
1156
1157 pub fn rationale_search(&self, query: &str, limit: u32) -> anyhow::Result<Vec<GitHubEvidence>> {
1158 github::rationale_search(self.storage.connection(), query, limit)
1159 }
1160
1161 pub fn github_refs_for_path(
1162 &self,
1163 path: &str,
1164 limit: u32,
1165 ) -> anyhow::Result<Vec<github::GitHubRef>> {
1166 github::refs_for_path(self.storage.connection(), path, limit)
1167 }
1168
1169 pub fn github_sync_status(&self) -> anyhow::Result<GitHubStatus> {
1170 self.github_status()
1171 }
1172
1173 pub fn papertrail_for_chunk(
1174 &self,
1175 chunk_id: i64,
1176 limit: u32,
1177 ) -> anyhow::Result<Option<Papertrail>> {
1178 let Some(chunk) = self.read_chunk(chunk_id)? else {
1179 return Ok(None);
1180 };
1181 Ok(Some(github::papertrail_for_chunk(self.storage.connection(), &chunk, limit)?))
1182 }
1183
1184 pub fn papertrail_for_symbol(
1185 &self,
1186 symbol: &str,
1187 language: Option<Language>,
1188 limit: u32,
1189 ) -> anyhow::Result<Option<Papertrail>> {
1190 let Some(symbol) = self.symbols(symbol, language, limit)?.into_iter().next() else {
1191 return Ok(None);
1192 };
1193 Ok(Some(github::papertrail_for_symbol(self.storage.connection(), &symbol, limit)?))
1194 }
1195
1196 pub fn papertrail_for_selected_symbol(
1197 &self,
1198 symbol: &crate::query::symbol::SymbolHit,
1199 limit: u32,
1200 ) -> anyhow::Result<Papertrail> {
1201 github::papertrail_for_symbol(self.storage.connection(), symbol, limit)
1202 }
1203
1204 pub fn papertrail_for_commit(
1205 &self,
1206 commit_hash: &str,
1207 limit: u32,
1208 ) -> anyhow::Result<Papertrail> {
1209 github::papertrail_for_commit(self.storage.connection(), commit_hash, limit)
1210 }
1211
1212 pub fn local_ai_status(&self) -> anyhow::Result<LocalAiStatus> {
1213 ai::status(self.storage.connection())
1214 }
1215
1216 pub fn list_models(&self) -> anyhow::Result<Vec<ModelInfo>> {
1217 ai::models(self.storage.connection())
1218 }
1219
1220 pub fn install_model(&self, model_id: &str) -> anyhow::Result<ModelInfo> {
1221 ai::install_model(self.storage.connection(), model_id)
1222 }
1223
1224 pub fn reconcile(
1225 &self,
1226 limit: Option<u32>,
1227 batch_size: Option<u32>,
1228 ) -> anyhow::Result<ReconcileReport> {
1229 ai::reconcile(self.storage.connection(), limit, batch_size)
1230 }
1231
1232 pub fn reconcile_plan(&self) -> anyhow::Result<ReconcilePlan> {
1233 ai::reconcile_plan(self.storage.connection())
1234 }
1235
1236 pub fn reconcile_with_progress(
1237 &self,
1238 limit: Option<u32>,
1239 batch_size: Option<u32>,
1240 force: bool,
1241 progress: impl FnMut(ai::ReconcileProgress),
1242 ) -> anyhow::Result<ReconcileReport> {
1243 ai::reconcile_with_progress(self.storage.connection(), limit, batch_size, force, progress)
1244 }
1245
1246 pub fn reconcile_with_options_progress(
1247 &self,
1248 options: ai::ReconcileOptions,
1249 progress: impl FnMut(ai::ReconcileProgress),
1250 ) -> anyhow::Result<ReconcileReport> {
1251 ai::reconcile_with_options_progress(self.storage.connection(), options, progress)
1252 }
1253
1254 pub fn current_embedding_count(&self, model_id: &str) -> anyhow::Result<u64> {
1255 ai::current_embedding_count(self.storage.connection(), model_id)
1256 }
1257
1258 pub fn heal_index(&self, limit: Option<u32>) -> anyhow::Result<HealIndexReport> {
1259 let Some(root) = self.storage.source_root() else {
1260 anyhow::bail!("heal_index requires source_root metadata; run `rag-rat index` first");
1261 };
1262 let indexed_files = self.indexed_files()?;
1263 let max_repairs = limit.map(usize::try_from).transpose()?.unwrap_or(usize::MAX);
1264 let mut report = HealIndexReport {
1265 checked_files: 0,
1266 healed_files: 0,
1267 removed_files: 0,
1268 skipped_files: 0,
1269 fts_fresh: false,
1270 message: None,
1271 };
1272
1273 for file in indexed_files {
1274 report.checked_files += 1;
1275 let path = Path::new(&file.path);
1276 let full_path = root.join(path);
1277 let Ok(text) = fs::read_to_string(&full_path) else {
1278 if usize::try_from(report.healed_files + report.removed_files).unwrap_or(usize::MAX)
1279 >= max_repairs
1280 {
1281 report.message =
1282 Some("limit reached; rerun heal_index to continue".to_string());
1283 break;
1284 }
1285 self.mark_file_deleted(path)?;
1286 report.removed_files += 1;
1287 continue;
1288 };
1289 let sha256 = hex_sha256(text.as_bytes());
1290 if sha256 == file.sha256 {
1291 report.skipped_files += 1;
1292 continue;
1293 }
1294 if usize::try_from(report.healed_files + report.removed_files).unwrap_or(usize::MAX)
1295 >= max_repairs
1296 {
1297 report.message = Some("limit reached; rerun heal_index to continue".to_string());
1298 break;
1299 }
1300 self.heal_file(path)?;
1301 report.healed_files += 1;
1302 }
1303
1304 if report.healed_files > 0 || report.removed_files > 0 {
1305 self.sync_fts()?;
1306 } else {
1307 self.ensure_fts_fresh()?;
1308 }
1309 report.fts_fresh = !self.fts_dirty()?;
1310 Ok(report)
1311 }
1312
1313 pub fn ffi_surface(&self, limit: u32) -> anyhow::Result<Vec<crate::query::impact::ImpactItem>> {
1314 crate::query::impact::ffi_surface(self.storage.connection(), limit)
1315 }
1316
1317 pub fn find_callers(
1318 &self,
1319 symbol: &str,
1320 limit: u32,
1321 ) -> anyhow::Result<Vec<crate::query::graph::GraphHop>> {
1322 crate::query::graph::traverse(self.storage.connection(), symbol, true, limit)
1323 }
1324
1325 pub fn find_callers_with_options(
1326 &self,
1327 symbol: &str,
1328 limit: u32,
1329 options: &crate::query::graph::GraphTraversalOptions,
1330 ) -> anyhow::Result<Vec<crate::query::graph::GraphHop>> {
1331 let options = self.graph_options_with_logical_group(options)?;
1332 crate::query::graph::traverse_with_options(
1333 self.storage.connection(),
1334 symbol,
1335 true,
1336 limit,
1337 &options,
1338 )
1339 }
1340
1341 pub fn trace_callees(
1342 &self,
1343 symbol: &str,
1344 limit: u32,
1345 ) -> anyhow::Result<Vec<crate::query::graph::GraphHop>> {
1346 crate::query::graph::traverse(self.storage.connection(), symbol, false, limit)
1347 }
1348
1349 pub fn trace_callees_with_options(
1350 &self,
1351 symbol: &str,
1352 limit: u32,
1353 options: &crate::query::graph::GraphTraversalOptions,
1354 ) -> anyhow::Result<Vec<crate::query::graph::GraphHop>> {
1355 let options = self.graph_options_with_logical_group(options)?;
1356 crate::query::graph::traverse_with_options(
1357 self.storage.connection(),
1358 symbol,
1359 false,
1360 limit,
1361 &options,
1362 )
1363 }
1364
1365 pub fn graph_traversal_report(
1366 &self,
1367 tool: &str,
1368 symbol: &crate::query::symbol::SymbolHit,
1369 reverse: bool,
1370 limit: u32,
1371 options: &crate::query::graph::GraphTraversalOptions,
1372 ) -> anyhow::Result<crate::query::graph::GraphTraversalReport> {
1373 let options = self.graph_options_with_logical_group(options)?;
1374 let results = crate::query::graph::traverse_with_options(
1375 self.storage.connection(),
1376 &symbol.qualified_name,
1377 reverse,
1378 limit,
1379 &options,
1380 )?;
1381 let summary = crate::query::graph::traversal_summary(
1382 self.storage.connection(),
1383 &symbol.qualified_name,
1384 reverse,
1385 limit,
1386 &options,
1387 results.len(),
1388 )?;
1389 let (logical_symbol, variants) = self.graph_logical_symbol(options.logical_symbol_id)?;
1390 let mut paths = BTreeSet::new();
1391 paths.insert(symbol.path.clone());
1392 for result in &results {
1393 if let Some(callsite) = &result.callsite {
1394 paths.insert(callsite.path.clone());
1395 }
1396 }
1397 let mut coverage = self.graph_coverage(paths)?;
1398 if summary.unresolved > 0 {
1399 coverage.known_index_gaps.push(format!(
1400 "{} unresolved qualified callsites match the requested final segment but are not verified to this symbol",
1401 summary.unresolved
1402 ));
1403 }
1404 Ok(crate::query::graph::GraphTraversalReport {
1405 query: crate::query::graph::GraphTraversalQuery {
1406 tool: tool.to_string(),
1407 symbol_id: Some(symbol.symbol_id),
1408 logical_symbol_id: options.logical_symbol_id,
1409 symbol_path: symbol.qualified_name.clone(),
1410 resolution: options.resolution_mode.as_str().to_string(),
1411 },
1412 logical_symbol,
1413 variants,
1414 summary,
1415 coverage,
1416 results,
1417 })
1418 }
1419
1420 pub fn compare_graph_to_text(
1421 &self,
1422 symbol: &crate::query::symbol::SymbolHit,
1423 pattern: &str,
1424 limit: u32,
1425 options: &crate::query::graph::GraphTraversalOptions,
1426 include_tests: bool,
1427 ) -> anyhow::Result<crate::query::graph::CompareGraphTextReport> {
1428 let regex = Regex::new(pattern)?;
1429 let options = self.graph_options_with_logical_group(options)?;
1430 let mut graph_edges = crate::query::graph::traverse_with_options(
1431 self.storage.connection(),
1432 &symbol.qualified_name,
1433 true,
1434 limit,
1435 &options,
1436 )?;
1437 if !include_tests {
1438 graph_edges.retain(|edge| {
1439 edge.callsite.as_ref().is_none_or(|callsite| !is_test_like_path(&callsite.path))
1440 });
1441 }
1442 let (logical_symbol, variants) = self.graph_logical_symbol(options.logical_symbol_id)?;
1443 let text_hits = self.regex_hits(pattern, ®ex, include_tests)?;
1444 let text_by_location = text_hits
1445 .iter()
1446 .map(|hit| ((hit.path.clone(), hit.line), hit))
1447 .collect::<BTreeMap<_, _>>();
1448 let graph_by_location = graph_edges
1449 .iter()
1450 .filter_map(|edge| {
1451 edge.callsite
1452 .as_ref()
1453 .map(|callsite| ((callsite.path.clone(), callsite.line), edge))
1454 })
1455 .collect::<BTreeMap<_, _>>();
1456
1457 let mut paths = BTreeSet::new();
1458 paths.insert(symbol.path.clone());
1459 for hit in &text_hits {
1460 paths.insert(hit.path.clone());
1461 }
1462 for edge in &graph_edges {
1463 if let Some(callsite) = &edge.callsite {
1464 paths.insert(callsite.path.clone());
1465 }
1466 }
1467
1468 let parser_failure_paths = self
1469 .parser_failure_paths()?
1470 .into_iter()
1471 .map(|failure| failure.path)
1472 .collect::<BTreeSet<_>>();
1473 let mut matched_hits = Vec::new();
1474 let mut text_only_hits = Vec::new();
1475 let mut likely_parser_gaps = Vec::new();
1476 for hit in &text_hits {
1477 if let Some(edge) = graph_by_location.get(&(hit.path.clone(), hit.line)) {
1478 matched_hits.push(crate::query::graph::MatchedGraphTextHit {
1479 path: hit.path.clone(),
1480 line: hit.line,
1481 text: hit.text.clone(),
1482 target: edge.target.clone(),
1483 edge_kind: edge.edge_kind.clone(),
1484 confidence: edge.confidence.clone(),
1485 resolution: edge.resolution.clone(),
1486 });
1487 } else {
1488 let gap_kind = classify_text_only_hit(&hit.path, &hit.text, &parser_failure_paths);
1489 let text_only_hit = crate::query::graph::TextOnlyHit {
1490 path: hit.path.clone(),
1491 line: hit.line,
1492 text: hit.text.clone(),
1493 reason: if gap_kind == "parser_call_extraction" || gap_kind == "parser_failure"
1494 {
1495 "no graph edge extracted"
1496 } else {
1497 "text mention outside graph-call evidence"
1498 }
1499 .to_string(),
1500 likely_gap: gap_kind.to_string(),
1501 };
1502 if is_likely_parser_gap_kind(gap_kind) {
1503 likely_parser_gaps.push(text_only_hit.clone());
1504 }
1505 text_only_hits.push(text_only_hit);
1506 }
1507 }
1508
1509 let mut graph_only_edges = Vec::new();
1510 let mut likely_false_positives = Vec::new();
1511 for edge in &graph_edges {
1512 let Some(callsite) = &edge.callsite else {
1513 continue;
1514 };
1515 if text_by_location.contains_key(&(callsite.path.clone(), callsite.line)) {
1516 continue;
1517 }
1518 let current_line = self.current_line_text(&callsite.path, callsite.line)?;
1519 let graph_only = crate::query::graph::GraphOnlyEdge {
1520 path: callsite.path.clone(),
1521 line: callsite.line,
1522 target: edge.target.clone(),
1523 edge_kind: edge.edge_kind.clone(),
1524 confidence: edge.confidence.clone(),
1525 resolution: edge.resolution.clone(),
1526 evidence: edge.evidence.clone(),
1527 reason: "graph edge exists but pattern did not match text".to_string(),
1528 likely_reason: graph_only_reason(edge, current_line.as_deref()),
1529 };
1530 if is_likely_false_positive_graph_only(edge, &graph_only) {
1531 likely_false_positives.push(graph_only.clone());
1532 }
1533 graph_only_edges.push(graph_only);
1534 }
1535 let complete = likely_parser_gaps.is_empty() && likely_false_positives.is_empty();
1536 let recommended_fallback =
1537 recommended_graph_text_fallback(&likely_parser_gaps, &graph_only_edges);
1538 let pattern_match_mode = compare_pattern_match_mode(pattern, &symbol.name);
1539 let mut warnings = Vec::new();
1540 if pattern_match_mode == "substring_identifier" {
1541 warnings.push(format!(
1542 "pattern may match identifiers that merely contain `{}`; use an identifier boundary or escaped call suffix for exact text auditing",
1543 symbol.name
1544 ));
1545 }
1546
1547 Ok(crate::query::graph::CompareGraphTextReport {
1548 query: crate::query::graph::CompareGraphTextQuery {
1549 symbol_id: Some(symbol.symbol_id),
1550 logical_symbol_id: options.logical_symbol_id,
1551 symbol_path: symbol.qualified_name.clone(),
1552 pattern: pattern.to_string(),
1553 resolution: options.resolution_mode.as_str().to_string(),
1554 include_tests,
1555 },
1556 logical_symbol,
1557 variants,
1558 summary: crate::query::graph::CompareGraphTextSummary {
1559 graph_hits: u64::try_from(graph_edges.len()).unwrap_or(u64::MAX),
1560 graph_edges: u64::try_from(graph_edges.len()).unwrap_or(u64::MAX),
1561 text_hits: u64::try_from(text_hits.len()).unwrap_or(u64::MAX),
1562 matched: u64::try_from(matched_hits.len()).unwrap_or(u64::MAX),
1563 graph_only: u64::try_from(graph_only_edges.len()).unwrap_or(u64::MAX),
1564 text_only: u64::try_from(text_only_hits.len()).unwrap_or(u64::MAX),
1565 text_mentions: u64::try_from(text_only_hits.len() - likely_parser_gaps.len())
1566 .unwrap_or(u64::MAX),
1567 likely_parser_gaps: u64::try_from(likely_parser_gaps.len()).unwrap_or(u64::MAX),
1568 likely_false_positives: u64::try_from(likely_false_positives.len())
1569 .unwrap_or(u64::MAX),
1570 likely_index_gaps: u64::try_from(likely_parser_gaps.len()).unwrap_or(u64::MAX),
1571 complete,
1572 recommended_fallback,
1573 pattern_match_mode,
1574 warnings,
1575 },
1576 coverage: self.graph_coverage(paths)?,
1577 matched_hits,
1578 text_only_hits,
1579 graph_only_edges,
1580 likely_parser_gaps,
1581 likely_false_positives,
1582 })
1583 }
1584
1585 fn graph_logical_symbol(
1586 &self,
1587 logical_symbol_id: Option<i64>,
1588 ) -> anyhow::Result<(
1589 Option<crate::query::graph::LogicalSymbol>,
1590 Vec<crate::query::graph::LogicalSymbolVariant>,
1591 )> {
1592 let Some(logical_symbol_id) = logical_symbol_id else {
1593 return Ok((None, Vec::new()));
1594 };
1595 let Some(logical) = crate::query::symbol::lookup_logical_by_id(
1596 self.storage.connection(),
1597 logical_symbol_id,
1598 )?
1599 else {
1600 return Ok((None, Vec::new()));
1601 };
1602 let variants = crate::query::symbol::logical_members(
1603 self.storage.connection(),
1604 logical.logical_symbol_id,
1605 )?
1606 .into_iter()
1607 .map(|member| crate::query::graph::LogicalSymbolVariant {
1608 symbol_id: member.symbol_id,
1609 cfg_expr: member.cfg_expr,
1610 signature_hash: member.signature_hash,
1611 start_line: member.start_line,
1612 end_line: member.end_line,
1613 })
1614 .collect::<Vec<_>>();
1615 Ok((
1616 Some(crate::query::graph::LogicalSymbol {
1617 logical_symbol_id: logical.logical_symbol_id,
1618 qualified_name: logical.qualified_name,
1619 variant_count: logical.variant_count,
1620 group_reason: logical.group_reason,
1621 }),
1622 variants,
1623 ))
1624 }
1625
1626 fn graph_options_with_logical_group(
1627 &self,
1628 options: &crate::query::graph::GraphTraversalOptions,
1629 ) -> anyhow::Result<crate::query::graph::GraphTraversalOptions> {
1630 if options.logical_symbol_id.is_some() {
1631 return Ok(options.clone());
1632 }
1633 let Some(symbol_id) = options.symbol_id else {
1634 return Ok(options.clone());
1635 };
1636 let Some(logical) =
1637 crate::query::symbol::logical_for_symbol_id(self.storage.connection(), symbol_id)?
1638 else {
1639 return Ok(options.clone());
1640 };
1641 let mut options = options.clone();
1642 options.logical_symbol_id = Some(logical.logical_symbol_id);
1643 Ok(options)
1644 }
1645
1646 fn local_symbol_context_hits(
1647 &self,
1648 symbol: &crate::query::symbol::SymbolHit,
1649 limit: u32,
1650 ) -> anyhow::Result<Vec<SearchHit>> {
1651 let mut stmt = self.storage.connection().prepare(
1652 "
1653 SELECT chunks.id, files.path, files.language, files.kind,
1654 chunks.start_line, chunks.end_line, chunks.symbol_path, chunks.text
1655 FROM chunks
1656 JOIN files ON files.id = chunks.file_id
1657 WHERE files.path = ?1
1658 AND (
1659 chunks.symbol_path = ?2
1660 OR chunks.symbol_path LIKE ?3
1661 OR chunks.text LIKE ?4
1662 )
1663 ORDER BY
1664 CASE
1665 WHEN chunks.symbol_path = ?2 THEN 0
1666 WHEN chunks.symbol_path LIKE ?3 THEN 1
1667 ELSE 2
1668 END,
1669 chunks.start_line
1670 LIMIT ?5
1671 ",
1672 )?;
1673 let rows = stmt.query_map(
1674 params![
1675 symbol.path,
1676 symbol.qualified_name,
1677 format!("%{}%", symbol.name),
1678 format!("%{}%", symbol.name),
1679 i64::from(limit.max(1)),
1680 ],
1681 |row| {
1682 let text: String = row.get(7)?;
1683 Ok(SearchHit {
1684 chunk_id: row.get(0)?,
1685 path: row.get(1)?,
1686 language: row.get(2)?,
1687 kind: row.get(3)?,
1688 start_line: row.get(4)?,
1689 end_line: row.get(5)?,
1690 symbol_path: row.get(6)?,
1691 score: 1.0,
1692 summary: bounded_summary(&text),
1693 graph: None,
1694 score_components: None,
1695 })
1696 },
1697 )?;
1698 let mut hits = Vec::new();
1699 for row in rows {
1700 hits.push(row?);
1701 }
1702 Ok(hits)
1703 }
1704
1705 pub fn impact_surface(
1706 &self,
1707 query: &str,
1708 limit: u32,
1709 ) -> anyhow::Result<Vec<crate::query::impact::ImpactItem>> {
1710 crate::query::impact::impact_surface(self.storage.connection(), query, limit)
1711 }
1712
1713 pub fn impact_surface_with_options(
1714 &self,
1715 query: &str,
1716 limit: u32,
1717 resolution_mode: crate::query::graph::GraphResolutionMode,
1718 ) -> anyhow::Result<Vec<crate::query::impact::ImpactItem>> {
1719 crate::query::impact::impact_surface_with_options(
1720 self.storage.connection(),
1721 query,
1722 limit,
1723 resolution_mode,
1724 )
1725 }
1726
1727 pub fn impact_surface_for_selected_symbol(
1728 &self,
1729 symbol: &crate::query::symbol::SymbolHit,
1730 limit: u32,
1731 resolution_mode: crate::query::graph::GraphResolutionMode,
1732 ) -> anyhow::Result<Vec<crate::query::impact::ImpactItem>> {
1733 crate::query::impact::impact_surface_for_symbol(
1734 self.storage.connection(),
1735 symbol,
1736 limit,
1737 resolution_mode,
1738 )
1739 }
1740
1741 pub fn impact_surface_report_for_selected_symbol(
1742 &self,
1743 symbol: &crate::query::symbol::SymbolHit,
1744 limit: u32,
1745 options: &crate::query::impact::ImpactSurfaceOptions,
1746 ) -> anyhow::Result<crate::query::impact::ImpactSurfaceReport> {
1747 crate::query::impact::impact_surface_report_for_symbol(
1748 self.storage.connection(),
1749 symbol,
1750 limit,
1751 options,
1752 )
1753 }
1754
1755 pub fn repo_brief(
1756 &self,
1757 options: crate::query::repo_brief::RepoBriefOptions,
1758 ) -> anyhow::Result<crate::query::repo_brief::RepoBrief> {
1759 crate::query::repo_brief::repo_brief(self.storage.connection(), options)
1760 }
1761
1762 pub fn memory_create(
1763 &self,
1764 request: crate::query::memory::RepoMemoryCreate,
1765 ) -> anyhow::Result<crate::query::memory::RepoMemoryCreateResult> {
1766 crate::query::memory::create_memory(self.storage.connection(), request)
1767 }
1768
1769 pub fn memory_update(
1770 &self,
1771 update: crate::query::memory::RepoMemoryUpdate,
1772 ) -> anyhow::Result<crate::query::memory::RepoMemory> {
1773 crate::query::memory::update_memory(self.storage.connection(), update)
1774 }
1775
1776 pub fn memory_mark_obsolete(
1777 &self,
1778 memory_id: &str,
1779 ) -> anyhow::Result<crate::query::memory::RepoMemory> {
1780 crate::query::memory::mark_obsolete(self.storage.connection(), memory_id)
1781 }
1782
1783 pub fn memory_search(
1784 &self,
1785 query: &str,
1786 limit: u32,
1787 ) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
1788 crate::query::memory::memory_search(self.storage.connection(), query, limit)
1789 }
1790
1791 pub fn memory_for_symbol(
1792 &self,
1793 symbol: &crate::query::symbol::SymbolHit,
1794 limit: u32,
1795 ) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
1796 crate::query::memory::memories_for_symbol(self.storage.connection(), symbol, limit)
1797 }
1798
1799 pub fn memory_for_path(
1800 &self,
1801 path: &str,
1802 limit: u32,
1803 ) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
1804 crate::query::memory::memories_for_path(self.storage.connection(), path, limit)
1805 }
1806
1807 pub fn memory_for_edges(
1808 &self,
1809 edge_ids: &[i64],
1810 limit: u32,
1811 ) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
1812 crate::query::memory::memories_for_edges(self.storage.connection(), edge_ids, limit)
1813 }
1814
1815 pub fn memory_evidence_for_symbol_and_edges(
1816 &self,
1817 symbol: &crate::query::symbol::SymbolHit,
1818 edge_ids: &[i64],
1819 limit: u32,
1820 ) -> anyhow::Result<crate::query::memory::RepoMemoryEvidence> {
1821 crate::query::memory::memory_evidence_for_symbol_and_edges(
1822 self.storage.connection(),
1823 symbol,
1824 edge_ids,
1825 limit,
1826 )
1827 }
1828
1829 pub fn memory_for_call_path_hash(
1830 &self,
1831 edge_sequence_hash: &str,
1832 limit: u32,
1833 ) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
1834 crate::query::memory::memories_for_call_path_hash(
1835 self.storage.connection(),
1836 edge_sequence_hash,
1837 limit,
1838 )
1839 }
1840
1841 pub fn memory_validate(
1842 &self,
1843 ) -> anyhow::Result<crate::query::memory::RepoMemoryValidationReport> {
1844 crate::query::memory::validate_memories(self.storage.connection())
1845 }
1846
1847 pub fn rebuild_fts(&self) -> anyhow::Result<()> {
1848 schema::rebuild_fts(self.storage.connection())?;
1849 self.record_content_revision()?;
1850 self.record_fts_current()?;
1851 self.set_meta("fts_dirty", "false")?;
1852 Ok(())
1853 }
1854
1855 pub fn sync_fts(&self) -> anyhow::Result<()> {
1856 self.record_content_revision()?;
1857 self.record_fts_current()?;
1858 self.set_meta("fts_dirty", "false")?;
1859 Ok(())
1860 }
1861
1862 fn record_fts_current(&self) -> anyhow::Result<()> {
1863 self.set_meta("fts_synced_at_ms", &now_ms().to_string())?;
1864 let revision = self.content_revision()?;
1865 self.set_meta("fts_source_revision", &revision)?;
1866 Ok(())
1867 }
1868
1869 fn record_content_revision(&self) -> anyhow::Result<String> {
1870 let revision = self.content_revision()?;
1871 self.set_meta("content_revision", &revision)?;
1872 Ok(revision)
1873 }
1874
1875 pub fn heal_file(&self, path: &Path) -> anyhow::Result<()> {
1876 let Some(root) = self.storage.source_root() else {
1877 anyhow::bail!("index has no source_root metadata; rebuild required");
1878 };
1879 let row = self.file_row(path)?;
1880 let full_path = root.join(path);
1881 let text = fs::read_to_string(&full_path)?;
1882
1883 let changes = git_changed_paths(root).unwrap_or_default();
1884 let is_dirty = changes.changed.contains(path);
1885 let has_base_commit = !self.active_commit_sha.is_empty();
1886 let scope = if !has_base_commit || is_dirty {
1887 FileScope::worktree(self.active_worktree_id.clone())
1888 } else {
1889 FileScope::commit(self.active_commit_sha.clone())
1890 };
1891 self.remove_file_in_scope(path, &scope.commit_sha, &scope.worktree_id)?;
1892
1893 self.index_file(
1894 path,
1895 row.language,
1896 row.kind,
1897 file_metadata_ms(&full_path)?,
1898 &text,
1899 &scope,
1900 )?;
1901 self.rebuild_logical_symbols()?;
1902 self.resolve_edges()
1903 }
1904
1905 fn index_file(
1906 &self,
1907 path: &Path,
1908 language: Language,
1909 kind: TargetKind,
1910 modified_at_ms: i64,
1911 text: &str,
1912 scope: &FileScope,
1913 ) -> anyhow::Result<()> {
1914 if language != Language::Markdown && kind != TargetKind::Generated {
1915 if text.len() > chunker::MAX_STRUCTURAL_PARSE_BYTES {
1916 } else if let Some(message) = parser::parse_error(path, language, text)
1919 .unwrap_or_else(|err| Some(err.to_string()))
1920 {
1921 self.insert_parser_failure(path, language, &message)?;
1922 }
1923 }
1924 let sha256 = hex_sha256(text.as_bytes());
1925 let file_id = self.storage.connection().query_row(
1926 "INSERT INTO main.files(path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms, indexed_revision, commit_sha, worktree_id)
1927 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)
1928 RETURNING id",
1929 params![
1930 path_string(path),
1931 language.as_str(),
1932 kind.as_str(),
1933 sha256,
1934 modified_at_ms,
1935 matches!(kind, TargetKind::Generated),
1936 now_ms(),
1937 sha256,
1938 &scope.commit_sha,
1939 &scope.worktree_id,
1940 ],
1941 |row| row.get::<_, i64>(0),
1942 )?;
1943 let chunks = if kind == TargetKind::Generated {
1944 chunker::generated_chunks_for_file(path, text)
1945 } else {
1946 chunker::chunks_for_file(path, language, text)
1947 };
1948 let symbols =
1949 if kind == TargetKind::Generated || text.len() > chunker::MAX_STRUCTURAL_PARSE_BYTES {
1950 Vec::new()
1951 } else {
1952 symbols::symbols_for_file(path, language, text)
1953 };
1954 self.insert_chunks(file_id, &sha256, &chunks, text)?;
1955 self.insert_symbols(file_id, language, &symbols)?;
1956 if kind != TargetKind::Generated && text.len() <= edges::MAX_GRAPH_PARSE_BYTES {
1957 edges::index_file_edges(self.storage.connection(), file_id, path, language, text)?;
1958 }
1959 self.mark_fts_dirty()?;
1960 Ok(())
1961 }
1962
1963 fn insert_prepared_file(&self, prepared_file: &PreparedIndexFile) -> anyhow::Result<()> {
1964 let file = &prepared_file.file;
1965 let prepared = match &prepared_file.prepared {
1966 Ok(prepared) => prepared,
1967 Err(err) => {
1968 self.insert_parser_failure(&file.relative_path, file.language, &err.to_string())?;
1969 return Ok(());
1970 },
1971 };
1972 if let Some(message) = &prepared.parser_failure {
1973 self.insert_parser_failure(&file.relative_path, file.language, message)?;
1974 }
1975 let file_id = self.storage.connection().query_row(
1976 "INSERT INTO main.files(path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms, indexed_revision, commit_sha, worktree_id)
1977 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)
1978 RETURNING id",
1979 params![
1980 path_string(&file.relative_path),
1981 file.language.as_str(),
1982 file.kind.as_str(),
1983 prepared.sha256,
1984 prepared.modified_at_ms,
1985 matches!(file.kind, TargetKind::Generated),
1986 now_ms(),
1987 prepared.sha256,
1988 file.commit_sha,
1989 file.worktree_id,
1990 ],
1991 |row| row.get::<_, i64>(0),
1992 )?;
1993 self.insert_chunks(file_id, &prepared.sha256, &prepared.chunks, &prepared.text)?;
1994 self.insert_symbols(file_id, file.language, &prepared.symbols)?;
1995 if file.kind != TargetKind::Generated && prepared.text.len() <= edges::MAX_GRAPH_PARSE_BYTES
1996 {
1997 edges::index_file_edges(
1998 self.storage.connection(),
1999 file_id,
2000 &file.relative_path,
2001 file.language,
2002 &prepared.text,
2003 )?;
2004 }
2005 self.mark_fts_dirty()?;
2006 Ok(())
2007 }
2008
2009 fn insert_chunks(
2010 &self,
2011 file_id: i64,
2012 source_revision: &str,
2013 chunks: &[Chunk],
2014 full_text: &str,
2015 ) -> anyhow::Result<()> {
2016 let (path, language, kind) = self.storage.connection().query_row(
2017 "SELECT path, language, kind FROM main.files WHERE id = ?1",
2018 [file_id],
2019 |row| {
2020 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?, row.get::<_, String>(2)?))
2021 },
2022 )?;
2023 for chunk in chunks {
2024 let anchor =
2025 anchors::anchor_for_text(&chunk.text, chunk.start_line, chunk.end_line, full_text);
2026 let embedding_policy = ai::embedding_policy_for_chunk(
2027 Path::new(&path),
2028 &language,
2029 &kind,
2030 chunk.kind,
2031 chunk.symbol_path.as_deref(),
2032 &chunk.text,
2033 ai::DEFAULT_MAX_EMBEDDING_CHARS,
2034 );
2035 self.storage.connection().execute(
2036 "INSERT INTO chunks(file_id, chunk_kind, symbol_path, start_byte, end_byte, start_line, end_line, text, text_hash,
2037 source_revision, anchor_version, normalized_hash, start_boundary_hash, end_boundary_hash,
2038 start_context_hash, end_context_hash, context_radius, embedding_policy, embedding_priority)
2039 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19)",
2040 params![
2041 file_id,
2042 chunk.kind,
2043 chunk.symbol_path,
2044 i64::try_from(chunk.start_byte)?,
2045 i64::try_from(chunk.end_byte)?,
2046 i64::try_from(chunk.start_line)?,
2047 i64::try_from(chunk.end_line)?,
2048 chunk.text,
2049 hex_sha256(chunk.text.as_bytes()),
2050 source_revision,
2051 anchor.version,
2052 anchor.normalized_hash,
2053 anchor.start_boundary_hash,
2054 anchor.end_boundary_hash,
2055 anchor.start_context_hash,
2056 anchor.end_context_hash,
2057 anchor.context_radius,
2058 embedding_policy.policy,
2059 embedding_policy.priority,
2060 ],
2061 )?;
2062 let chunk_id = self.storage.connection().last_insert_rowid();
2063 self.storage.connection().execute(
2064 "INSERT INTO chunk_fts(rowid, text) VALUES (?1, ?2)",
2065 params![chunk_id, chunk.text],
2066 )?;
2067 }
2068 Ok(())
2069 }
2070
2071 fn insert_symbols(
2072 &self,
2073 file_id: i64,
2074 language: Language,
2075 symbols: &[Symbol],
2076 ) -> anyhow::Result<()> {
2077 for symbol in symbols {
2078 self.storage.connection().execute(
2079 "INSERT INTO symbols(file_id, language, name, qualified_name, kind, start_byte, end_byte, signature, docs)
2080 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)",
2081 params![
2082 file_id,
2083 language.as_str(),
2084 symbol.name,
2085 symbol.qualified_name,
2086 symbol.kind,
2087 i64::try_from(symbol.start_byte)?,
2088 i64::try_from(symbol.end_byte)?,
2089 symbol.signature,
2090 symbol.docs,
2091 ],
2092 )?;
2093 let symbol_id = self.storage.connection().last_insert_rowid();
2094 for fact in &symbol.facts {
2095 self.storage.connection().execute(
2096 "INSERT OR IGNORE INTO symbol_facts(symbol_id, fact_kind, fact_value)
2097 VALUES (?1, ?2, ?3)",
2098 params![symbol_id, fact.kind, fact.value],
2099 )?;
2100 }
2101 }
2102 Ok(())
2103 }
2104
2105 fn write_git_meta(&self, root: &Path) -> anyhow::Result<()> {
2106 self.set_meta("git_commit", &git_output(root, &["rev-parse", "HEAD"]).unwrap_or_default())?;
2107 let dirty = !git_output(root, &["status", "--porcelain"]).unwrap_or_default().is_empty();
2108 self.set_meta("git_dirty", if dirty { "true" } else { "false" })?;
2109 Ok(())
2110 }
2111
2112 fn apply_prepared_git_history(
2113 &self,
2114 root: &Path,
2115 handle: JoinHandle<anyhow::Result<git_history::PreparedGitHistory>>,
2116 ) -> anyhow::Result<GitHistoryIndexStatus> {
2117 let prepared = join_git_history_prepare(handle)?;
2118 git_history::apply_prepared(self.storage.connection(), root, prepared)
2119 }
2120
2121 fn git_history_status(&self) -> anyhow::Result<GitHistoryIndexStatus> {
2122 let Some(root) = self.storage.source_root() else {
2123 return git_history::status(self.storage.connection(), Path::new("."));
2124 };
2125 git_history::status(self.storage.connection(), root)
2126 }
2127
2128 fn github_status(&self) -> anyhow::Result<GitHubStatus> {
2129 github::status(self.storage.connection())
2130 }
2131
2132 fn mark_fts_dirty(&self) -> anyhow::Result<()> {
2133 self.set_meta("fts_dirty", "true")
2134 }
2135
2136 fn resolve_edges(&self) -> anyhow::Result<()> {
2137 edges::resolve_all_edges(self.storage.connection())
2138 }
2139
2140 fn rebuild_logical_symbols(&self) -> anyhow::Result<()> {
2141 self.storage.connection().execute_batch(
2142 "
2143 CREATE TEMP TABLE IF NOT EXISTS logical_symbols_to_rebuild(id INTEGER PRIMARY KEY);
2144 DELETE FROM temp.logical_symbols_to_rebuild;
2145 INSERT OR IGNORE INTO temp.logical_symbols_to_rebuild(id)
2146 SELECT logical_symbol_members.logical_symbol_id
2147 FROM main.logical_symbol_members
2148 JOIN main.symbols ON symbols.id = logical_symbol_members.symbol_id
2149 JOIN files ON files.id = symbols.file_id;
2150 DELETE FROM main.logical_symbol_members
2151 WHERE logical_symbol_id IN (
2152 SELECT id FROM temp.logical_symbols_to_rebuild
2153 );
2154 DELETE FROM main.logical_symbols
2155 WHERE id IN (
2156 SELECT id FROM temp.logical_symbols_to_rebuild
2157 );
2158 DELETE FROM temp.logical_symbols_to_rebuild;
2159 ",
2160 )?;
2161
2162 let mut stmt = self.storage.connection().prepare(
2163 "
2164 SELECT symbols.id, symbols.file_id, files.path, symbols.language, symbols.name,
2165 symbols.qualified_name, symbols.kind, symbols.start_byte, symbols.end_byte,
2166 symbols.signature,
2167 COALESCE((
2168 SELECT chunks.start_byte
2169 FROM chunks
2170 WHERE chunks.file_id = symbols.file_id
2171 AND symbols.start_byte >= chunks.start_byte
2172 AND symbols.start_byte < chunks.end_byte
2173 ORDER BY chunks.end_byte - chunks.start_byte ASC
2174 LIMIT 1
2175 ), symbols.start_byte) AS chunk_start_byte,
2176 COALESCE((
2177 SELECT chunks.start_line
2178 FROM chunks
2179 WHERE chunks.file_id = symbols.file_id
2180 AND symbols.start_byte >= chunks.start_byte
2181 AND symbols.start_byte < chunks.end_byte
2182 ORDER BY chunks.end_byte - chunks.start_byte ASC
2183 LIMIT 1
2184 ), 1) AS chunk_start_line,
2185 COALESCE((
2186 SELECT chunks.text
2187 FROM chunks
2188 WHERE chunks.file_id = symbols.file_id
2189 AND symbols.start_byte >= chunks.start_byte
2190 AND symbols.start_byte < chunks.end_byte
2191 ORDER BY chunks.end_byte - chunks.start_byte ASC
2192 LIMIT 1
2193 ), '') AS chunk_text
2194 FROM symbols
2195 JOIN files ON files.id = symbols.file_id
2196 ORDER BY files.path, symbols.language, symbols.qualified_name, symbols.kind,
2197 symbols.start_byte, symbols.end_byte
2198 ",
2199 )?;
2200 let rows = stmt.query_map([], |row| {
2201 let start_byte = usize::try_from(row.get::<_, i64>(7)?).unwrap_or(0);
2202 let end_byte = usize::try_from(row.get::<_, i64>(8)?).unwrap_or(0);
2203 let chunk_start_byte = usize::try_from(row.get::<_, i64>(10)?).unwrap_or(start_byte);
2204 let chunk_start_line = row.get::<_, i64>(11)?;
2205 let chunk_text: String = row.get(12)?;
2206 let start_line =
2207 symbol_line_for_byte(&chunk_text, chunk_start_byte, chunk_start_line, start_byte);
2208 let end_line =
2209 symbol_line_for_byte(&chunk_text, chunk_start_byte, chunk_start_line, end_byte);
2210 Ok(LogicalSymbolMemberRow {
2211 symbol_id: row.get(0)?,
2212 path: row.get(2)?,
2213 language: row.get(3)?,
2214 name: row.get(4)?,
2215 qualified_name: row.get(5)?,
2216 kind: row.get(6)?,
2217 signature: row.get(9)?,
2218 start_line,
2219 end_line,
2220 })
2221 })?;
2222 let mut groups: BTreeMap<LogicalSymbolKey, Vec<LogicalSymbolMemberRow>> = BTreeMap::new();
2223 for row in rows {
2224 let row = row?;
2225 groups.entry(LogicalSymbolKey::from(&row)).or_default().push(row);
2226 }
2227 for (key, members) in groups {
2228 let group_reason = if members.len() > 1 { "cfg_variant" } else { "single" };
2229 self.storage.connection().execute(
2230 "
2231 INSERT INTO logical_symbols(language, path, logical_name, qualified_name, kind, variant_count, group_reason)
2232 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)
2233 ",
2234 params![
2235 key.language,
2236 key.path,
2237 key.name,
2238 key.qualified_name,
2239 key.kind,
2240 i64::try_from(members.len()).unwrap_or(i64::MAX),
2241 group_reason,
2242 ],
2243 )?;
2244 let logical_symbol_id = self.storage.connection().last_insert_rowid();
2245 for member in members {
2246 let signature_hash =
2247 member.signature.as_deref().map(|signature| hex_sha256(signature.as_bytes()));
2248 self.storage.connection().execute(
2249 "
2250 INSERT INTO logical_symbol_members(
2251 logical_symbol_id, symbol_id, cfg_expr, signature_hash, start_line, end_line
2252 )
2253 VALUES (?1, ?2, NULL, ?3, ?4, ?5)
2254 ",
2255 params![
2256 logical_symbol_id,
2257 member.symbol_id,
2258 signature_hash,
2259 member.start_line,
2260 member.end_line,
2261 ],
2262 )?;
2263 }
2264 }
2265 Ok(())
2266 }
2267
2268 fn graph_coverage(
2269 &self,
2270 paths: BTreeSet<String>,
2271 ) -> anyhow::Result<crate::query::graph::GraphCoverage> {
2272 let indexed_files =
2273 self.storage
2274 .connection()
2275 .query_row("SELECT COUNT(*) FROM files", [], |row| row.get::<_, i64>(0))?;
2276 let parser_failure_paths = self.parser_failure_paths()?;
2277 let parser_failures = u64::try_from(parser_failure_paths.len()).unwrap_or(0);
2278 let known_index_gaps = parser_failure_paths
2279 .iter()
2280 .map(|failure| {
2281 format!(
2282 "{} parser failed for {}: {}",
2283 failure.language, failure.path, failure.message
2284 )
2285 })
2286 .collect::<Vec<_>>();
2287 let mut stale_files = 0_u64;
2288 let mut parser_coverage_for_paths = Vec::new();
2289 for path in paths {
2290 let Some(row) = self.graph_path_row(&path)? else {
2291 parser_coverage_for_paths.push(crate::query::graph::GraphPathCoverage {
2292 path,
2293 language: "unknown".to_string(),
2294 parser_status: "missing_from_index".to_string(),
2295 graph_status: "missing_from_index".to_string(),
2296 last_indexed_revision: None,
2297 });
2298 continue;
2299 };
2300 let stale = self.source_path_is_stale(&path, &row.sha256);
2301 if stale {
2302 stale_files += 1;
2303 }
2304 let parser_failed = parser_failure_paths.iter().any(|failure| failure.path == path);
2305 parser_coverage_for_paths.push(crate::query::graph::GraphPathCoverage {
2306 path,
2307 language: row.language,
2308 parser_status: if parser_failed { "failed" } else { "ok" }.to_string(),
2309 graph_status: if stale {
2310 "stale_source"
2311 } else if parser_failed {
2312 "parser_failed"
2313 } else {
2314 "ok"
2315 }
2316 .to_string(),
2317 last_indexed_revision: (!row.indexed_revision.is_empty())
2318 .then_some(row.indexed_revision),
2319 });
2320 }
2321 Ok(crate::query::graph::GraphCoverage {
2322 indexed_files: u64::try_from(indexed_files).unwrap_or(0),
2323 parser_failures,
2324 stale_files,
2325 known_index_gaps,
2326 parser_coverage_for_paths,
2327 })
2328 }
2329
2330 fn graph_path_row(&self, path: &str) -> anyhow::Result<Option<GraphPathRow>> {
2331 self.storage
2332 .connection()
2333 .query_row(
2334 "SELECT language, sha256, indexed_revision FROM files WHERE path = ?1",
2335 [path],
2336 |row| {
2337 Ok(GraphPathRow {
2338 language: row.get(0)?,
2339 sha256: row.get(1)?,
2340 indexed_revision: row.get(2)?,
2341 })
2342 },
2343 )
2344 .optional()
2345 .map_err(Into::into)
2346 }
2347
2348 fn source_path_is_stale(&self, path: &str, indexed_sha256: &str) -> bool {
2349 let Some(root) = self.storage.source_root() else {
2350 return false;
2351 };
2352 let Ok(bytes) = fs::read(root.join(path)) else {
2353 return true;
2354 };
2355 hex_sha256(&bytes) != indexed_sha256
2356 }
2357
2358 fn regex_hits(
2359 &self,
2360 pattern: &str,
2361 regex: &Regex,
2362 include_tests: bool,
2363 ) -> anyhow::Result<Vec<crate::query::graph::TextOnlyHit>> {
2364 let Some(root) = self.storage.source_root() else {
2365 anyhow::bail!("cannot compare graph to text: source_root is missing from index_meta");
2366 };
2367 let mut stmt = self.storage.connection().prepare("SELECT path FROM files ORDER BY path")?;
2368 let paths =
2369 stmt.query_map([], |row| row.get::<_, String>(0))?.collect::<Result<Vec<_>, _>>()?;
2370 let mut hits = Vec::new();
2371 for path in paths {
2372 if !include_tests && is_test_like_path(&path) {
2373 continue;
2374 }
2375 let full_path = root.join(&path);
2376 let Ok(text) = fs::read_to_string(&full_path) else {
2377 continue;
2378 };
2379 for (index, line) in text.lines().enumerate() {
2380 if regex.is_match(line) {
2381 hits.push(crate::query::graph::TextOnlyHit {
2382 path: path.clone(),
2383 line: i64::try_from(index + 1).unwrap_or(i64::MAX),
2384 text: line.trim().to_string(),
2385 reason: "text pattern matched".to_string(),
2386 likely_gap: pattern.to_string(),
2387 });
2388 }
2389 }
2390 }
2391 Ok(hits)
2392 }
2393
2394 fn current_line_text(&self, path: &str, line: i64) -> anyhow::Result<Option<String>> {
2395 let Some(root) = self.storage.source_root() else {
2396 return Ok(None);
2397 };
2398 let Ok(text) = fs::read_to_string(root.join(path)) else {
2399 return Ok(None);
2400 };
2401 let Some(index) = usize::try_from(line.saturating_sub(1)).ok() else {
2402 return Ok(None);
2403 };
2404 Ok(text.lines().nth(index).map(|line| line.trim().to_string()))
2405 }
2406
2407 fn ensure_graph_index_current(&self) -> anyhow::Result<()> {
2408 if self.meta("graph_index_version")?.as_deref() == Some(GRAPH_INDEX_VERSION) {
2409 return Ok(());
2410 }
2411 let Some(root) = self.storage.source_root().map(Path::to_path_buf) else {
2412 return Ok(());
2413 };
2414 self.storage.execute_batch("BEGIN IMMEDIATE TRANSACTION")?;
2415 let result = (|| -> anyhow::Result<()> {
2416 self.storage.connection().execute("DELETE FROM edges", [])?;
2417 let files = self.graph_reindex_files()?;
2418 for file in files {
2419 if file.kind == TargetKind::Generated || file.language == Language::Markdown {
2420 continue;
2421 }
2422 let full_path = root.join(&file.path);
2423 let Ok(text) = fs::read_to_string(full_path) else {
2424 continue;
2425 };
2426 if text.len() > edges::MAX_GRAPH_PARSE_BYTES {
2427 continue;
2428 }
2429 edges::index_file_edges(
2430 self.storage.connection(),
2431 file.id,
2432 Path::new(&file.path),
2433 file.language,
2434 &text,
2435 )?;
2436 }
2437 self.resolve_edges()?;
2438 self.mark_graph_index_current()?;
2439 Ok(())
2440 })();
2441 if result.is_err() {
2442 let _ = self.storage.execute_batch("ROLLBACK");
2443 }
2444 result?;
2445 self.storage.execute_batch("COMMIT")?;
2446 Ok(())
2447 }
2448
2449 fn mark_graph_index_current(&self) -> anyhow::Result<()> {
2450 self.set_meta("graph_index_version", GRAPH_INDEX_VERSION)
2451 }
2452
2453 fn set_meta(&self, key: &str, value: &str) -> anyhow::Result<()> {
2454 self.storage.connection().execute(
2455 "INSERT INTO index_meta(key, value) VALUES (?1, ?2)
2456 ON CONFLICT(key) DO UPDATE SET value = excluded.value",
2457 params![key, value],
2458 )?;
2459 Ok(())
2460 }
2461
2462 fn meta(&self, key: &str) -> anyhow::Result<Option<String>> {
2463 meta_for(self.storage.connection(), key)
2464 }
2465
2466 fn insert_parser_failure(
2467 &self,
2468 path: &Path,
2469 language: Language,
2470 message: &str,
2471 ) -> anyhow::Result<()> {
2472 self.storage.connection().execute(
2473 "INSERT INTO parser_failures(path, language, message) VALUES (?1, ?2, ?3)",
2474 params![path_string(path), language.as_str(), message],
2475 )?;
2476 Ok(())
2477 }
2478
2479 fn parser_failure_count(&self) -> anyhow::Result<u64> {
2480 let count = self.storage.connection().query_row(
2481 "SELECT COUNT(*) FROM parser_failures",
2482 [],
2483 |row| row.get::<_, i64>(0),
2484 )?;
2485 Ok(u64::try_from(count).unwrap_or(0))
2486 }
2487
2488 fn parser_failure_paths(&self) -> anyhow::Result<Vec<ParserFailure>> {
2489 let mut stmt = self.storage.connection().prepare(
2490 "SELECT path, language, message FROM parser_failures ORDER BY path, language, message",
2491 )?;
2492 let rows = stmt.query_map([], |row| {
2493 Ok(ParserFailure { path: row.get(0)?, language: row.get(1)?, message: row.get(2)? })
2494 })?;
2495 let mut failures = Vec::new();
2496 for row in rows {
2497 failures.push(row?);
2498 }
2499 Ok(failures)
2500 }
2501
2502 fn search_with_heal(
2503 &self,
2504 query: &str,
2505 limit: u32,
2506 include_generated: bool,
2507 allow_heal: bool,
2508 explain: bool,
2509 options: SearchOptions,
2510 ) -> anyhow::Result<Vec<SearchHit>> {
2511 let hits = crate::search::lexical::search_with_options(
2512 self.storage.connection(),
2513 query,
2514 limit,
2515 include_generated,
2516 explain,
2517 options,
2518 )?;
2519 if !allow_heal {
2520 return Ok(hits);
2521 }
2522 let stale = self.stale_hit_paths(&hits)?;
2523 if stale.is_empty() {
2524 return Ok(hits);
2525 }
2526 if stale.len() > MAX_AUTO_HEAL_FILES_PER_CALL {
2527 anyhow::bail!(IndexError::NeedsReindex {
2528 stale_files: stale.len(),
2529 cap: MAX_AUTO_HEAL_FILES_PER_CALL,
2530 });
2531 }
2532 for path in stale {
2533 self.heal_file(Path::new(&path))?;
2534 }
2535 self.sync_fts()?;
2536 self.search_with_heal(query, limit, include_generated, false, explain, options)
2537 }
2538
2539 fn stale_hit_paths(&self, hits: &[SearchHit]) -> anyhow::Result<Vec<String>> {
2540 let Some(root) = self.storage.source_root() else {
2541 return Ok(Vec::new());
2542 };
2543 let mut stale = Vec::new();
2544 let mut seen = BTreeSet::new();
2545 for hit in hits {
2546 if !seen.insert(hit.path.clone()) {
2547 continue;
2548 }
2549 let source_path = root.join(&hit.path);
2550 let Ok(text) = fs::read_to_string(source_path) else {
2551 stale.push(hit.path.clone());
2552 continue;
2553 };
2554 let chunk = crate::query::read_chunk(self.storage.connection(), hit.chunk_id)?;
2555 let Some(chunk) = chunk else {
2556 stale.push(hit.path.clone());
2557 continue;
2558 };
2559 let anchor = self.chunk_anchor(hit.chunk_id)?;
2560 let status = anchors::validate(
2561 &chunk.text,
2562 usize::try_from(chunk.start_line).unwrap_or(1),
2563 usize::try_from(chunk.end_line).unwrap_or(1),
2564 &anchor,
2565 &text,
2566 );
2567 if !matches!(status, AnchorStatus::Exact) {
2568 stale.push(hit.path.clone());
2569 }
2570 }
2571 Ok(stale)
2572 }
2573
2574 fn chunk_anchor(&self, chunk_id: i64) -> anyhow::Result<ChunkAnchor> {
2575 Ok(self.storage.connection().query_row(
2576 "
2577 SELECT anchor_version, normalized_hash, start_boundary_hash, end_boundary_hash,
2578 start_context_hash, end_context_hash, context_radius
2579 FROM chunks WHERE id = ?1
2580 ",
2581 [chunk_id],
2582 |row| {
2583 Ok(ChunkAnchor {
2584 version: row.get(0)?,
2585 normalized_hash: row.get(1)?,
2586 start_boundary_hash: row.get(2)?,
2587 end_boundary_hash: row.get(3)?,
2588 start_context_hash: row.get(4)?,
2589 end_context_hash: row.get(5)?,
2590 context_radius: row.get(6)?,
2591 })
2592 },
2593 )?)
2594 }
2595
2596 fn mark_file_deleted(&self, path: &Path) -> anyhow::Result<()> {
2597 let path = path_string(path);
2598 self.remove_file_in_scope(Path::new(&path), "", &self.active_worktree_id)?;
2599 self.storage.connection().execute(
2600 "INSERT INTO main.files(path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms, indexed_revision, commit_sha, worktree_id)
2601 VALUES (?1, 'unknown', 'deleted', '', 0, 0, ?2, '', '', ?3)
2602 ON CONFLICT(path, commit_sha, worktree_id) DO UPDATE SET
2603 kind = 'deleted',
2604 sha256 = '',
2605 modified_at_ms = 0,
2606 indexed_at_ms = excluded.indexed_at_ms",
2607 params![path, now_ms(), self.active_worktree_id],
2608 )?;
2609 self.mark_fts_dirty()?;
2610 Ok(())
2611 }
2612
2613 fn remove_file_in_scope(
2614 &self,
2615 path: &Path,
2616 commit_sha: &str,
2617 worktree_id: &str,
2618 ) -> anyhow::Result<()> {
2619 let path = path_string(path);
2620 self.storage.connection().execute(
2621 "UPDATE edges
2622 SET to_symbol_id = NULL,
2623 confidence = 'NameOnly'
2624 WHERE to_symbol_id IN (
2625 SELECT symbols.id FROM symbols
2626 JOIN main.files ON main.files.id = symbols.file_id
2627 WHERE main.files.path = ?1
2628 AND main.files.commit_sha = ?2
2629 AND main.files.worktree_id = ?3
2630 )",
2631 params![path, commit_sha, worktree_id],
2632 )?;
2633 self.storage.connection().execute(
2634 "DELETE FROM edges
2635 WHERE source_file_id IN (
2636 SELECT id FROM main.files
2637 WHERE path = ?1 AND commit_sha = ?2 AND worktree_id = ?3
2638 )
2639 OR from_symbol_id IN (
2640 SELECT symbols.id FROM symbols
2641 JOIN main.files ON main.files.id = symbols.file_id
2642 WHERE main.files.path = ?1
2643 AND main.files.commit_sha = ?2
2644 AND main.files.worktree_id = ?3
2645 )",
2646 params![path, commit_sha, worktree_id],
2647 )?;
2648 self.storage
2649 .connection()
2650 .execute("DELETE FROM parser_failures WHERE path = ?1", [&path])?;
2651 self.storage.connection().execute(
2652 "DELETE FROM chunk_fts
2653 WHERE rowid IN (
2654 SELECT chunks.id FROM chunks
2655 JOIN main.files ON main.files.id = chunks.file_id
2656 WHERE main.files.path = ?1
2657 AND main.files.commit_sha = ?2
2658 AND main.files.worktree_id = ?3
2659 )",
2660 params![path, commit_sha, worktree_id],
2661 )?;
2662 self.storage.connection().execute(
2663 "DELETE FROM chunks
2664 WHERE file_id IN (
2665 SELECT id FROM main.files
2666 WHERE path = ?1 AND commit_sha = ?2 AND worktree_id = ?3
2667 )",
2668 params![path, commit_sha, worktree_id],
2669 )?;
2670 self.storage.connection().execute(
2671 "DELETE FROM symbols
2672 WHERE file_id IN (
2673 SELECT id FROM main.files
2674 WHERE path = ?1 AND commit_sha = ?2 AND worktree_id = ?3
2675 )",
2676 params![path, commit_sha, worktree_id],
2677 )?;
2678 self.storage.connection().execute(
2679 "DELETE FROM main.files WHERE path = ?1 AND commit_sha = ?2 AND worktree_id = ?3",
2680 params![path, commit_sha, worktree_id],
2681 )?;
2682 self.mark_fts_dirty()?;
2683 Ok(())
2684 }
2685
2686 fn ensure_fts_fresh(&self) -> anyhow::Result<()> {
2687 let content_revision = self.content_revision()?;
2688 let fts_source_revision = self.meta("fts_source_revision")?;
2689 if !self.fts_dirty()? && fts_source_revision.as_deref() == Some(content_revision.as_str()) {
2690 return Ok(());
2691 }
2692 self.rebuild_fts()?;
2693 let refreshed_revision = self.meta("fts_source_revision")?;
2694 if refreshed_revision.as_deref() != Some(content_revision.as_str()) {
2695 anyhow::bail!(
2696 "FTS freshness invariant failed: content_revision={content_revision}, fts_source_revision={}",
2697 refreshed_revision.unwrap_or_else(|| "<missing>".to_string())
2698 );
2699 }
2700 Ok(())
2701 }
2702
2703 fn fts_dirty(&self) -> anyhow::Result<bool> {
2704 Ok(self.meta("fts_dirty")?.as_deref() == Some("true"))
2705 }
2706
2707 fn file_row(&self, path: &Path) -> anyhow::Result<FileRow> {
2708 self.storage
2709 .connection()
2710 .query_row(
2711 "SELECT language, kind FROM files WHERE path = ?1",
2712 [path_string(path)],
2713 |row| {
2714 let language: String = row.get(0)?;
2715 let kind: String = row.get(1)?;
2716 Ok((language, kind))
2717 },
2718 )
2719 .map_err(Into::into)
2720 .and_then(|(language, kind)| {
2721 Ok(FileRow { language: language.parse()?, kind: kind.parse()? })
2722 })
2723 }
2724
2725 fn graph_reindex_files(&self) -> anyhow::Result<Vec<GraphReindexFile>> {
2726 let mut stmt = self
2727 .storage
2728 .connection()
2729 .prepare("SELECT id, path, language, kind FROM files ORDER BY path")?;
2730 let rows = stmt.query_map([], |row| {
2731 let language: String = row.get(2)?;
2732 let kind: String = row.get(3)?;
2733 Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?, language, kind))
2734 })?;
2735 let mut files = Vec::new();
2736 for row in rows {
2737 let (id, path, language, kind) = row?;
2738 files.push(GraphReindexFile {
2739 id,
2740 path,
2741 language: language.parse()?,
2742 kind: kind.parse()?,
2743 });
2744 }
2745 Ok(files)
2746 }
2747
2748 fn indexed_files(&self) -> anyhow::Result<Vec<IndexedFile>> {
2749 let mut stmt =
2750 self.storage.connection().prepare("SELECT path, sha256 FROM files ORDER BY path")?;
2751 let rows =
2752 stmt.query_map([], |row| Ok(IndexedFile { path: row.get(0)?, sha256: row.get(1)? }))?;
2753 let mut files = Vec::new();
2754 for row in rows {
2755 files.push(row?);
2756 }
2757 Ok(files)
2758 }
2759
2760 fn indexed_file_count(&self) -> anyhow::Result<usize> {
2761 let count =
2762 self.storage
2763 .connection()
2764 .query_row("SELECT COUNT(*) FROM files", [], |row| row.get::<_, i64>(0))?;
2765 Ok(usize::try_from(count).unwrap_or(usize::MAX))
2766 }
2767
2768 fn content_revision(&self) -> anyhow::Result<String> {
2769 let value = self.storage.connection().query_row(
2770 "SELECT COALESCE(string_agg(path || ':' || sha256, ',' ORDER BY path), '') FROM files",
2771 [],
2772 |row| row.get::<_, String>(0),
2773 )?;
2774 Ok(hex_sha256(value.as_bytes()))
2775 }
2776}
2777
2778#[derive(Debug)]
2779struct FileRow {
2780 language: Language,
2781 kind: TargetKind,
2782}
2783
2784#[derive(Debug)]
2785struct GraphReindexFile {
2786 id: i64,
2787 path: String,
2788 language: Language,
2789 kind: TargetKind,
2790}
2791
2792#[derive(Debug)]
2793struct GraphPathRow {
2794 language: String,
2795 sha256: String,
2796 indexed_revision: String,
2797}
2798
2799fn rank_docs_for_symbol(symbol: &crate::query::symbol::SymbolHit, hits: &mut [SearchHit]) {
2800 let source_module = module_stem(&symbol.path);
2801 let symbol_name = symbol.name.to_ascii_lowercase();
2802 let qualified_name = symbol.qualified_name.to_ascii_lowercase();
2803 hits.sort_by(|a, b| {
2804 let a_rank = docs_locality_rank(symbol, &source_module, &symbol_name, &qualified_name, a);
2805 let b_rank = docs_locality_rank(symbol, &source_module, &symbol_name, &qualified_name, b);
2806 a_rank
2807 .cmp(&b_rank)
2808 .then_with(|| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal))
2809 .then_with(|| a.path.cmp(&b.path))
2810 .then_with(|| a.start_line.cmp(&b.start_line))
2811 });
2812 for (idx, hit) in hits.iter_mut().enumerate() {
2813 hit.score = (10_000usize.saturating_sub(idx)) as f64;
2814 }
2815}
2816
2817fn docs_locality_rank(
2818 symbol: &crate::query::symbol::SymbolHit,
2819 source_module: &str,
2820 symbol_name: &str,
2821 qualified_name: &str,
2822 hit: &SearchHit,
2823) -> u8 {
2824 let path = hit.path.to_ascii_lowercase();
2825 let summary = hit.summary.to_ascii_lowercase();
2826 let hit_symbol = hit.symbol_path.as_deref().unwrap_or_default().to_ascii_lowercase();
2827 if hit.path == symbol.path && hit_symbol == symbol.qualified_name.to_ascii_lowercase() {
2828 return 0;
2829 }
2830 if hit.path == symbol.path {
2831 return 1;
2832 }
2833 if !source_module.is_empty()
2834 && path.contains(source_module)
2835 && (summary.contains(symbol_name) || hit_symbol.contains(symbol_name))
2836 {
2837 return 2;
2838 }
2839 if summary.contains(qualified_name) || hit_symbol.contains(qualified_name) {
2840 return 3;
2841 }
2842 if summary.contains(symbol_name) || hit_symbol.contains(symbol_name) {
2843 return 4;
2844 }
2845 if !source_module.is_empty() && path.contains(source_module) {
2846 return 5;
2847 }
2848 9
2849}
2850
2851fn module_stem(path: &str) -> String {
2852 Path::new(path)
2853 .file_stem()
2854 .and_then(|value| value.to_str())
2855 .unwrap_or_default()
2856 .to_ascii_lowercase()
2857}
2858
2859fn dedupe_search_hits(hits: &mut Vec<SearchHit>) {
2860 let mut seen = BTreeSet::new();
2861 hits.retain(|hit| seen.insert(hit.chunk_id));
2862}
2863
2864fn bounded_summary(text: &str) -> String {
2865 text.split_whitespace().collect::<Vec<_>>().join(" ").chars().take(240).collect()
2866}
2867
2868#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
2869struct LogicalSymbolKey {
2870 language: String,
2871 path: String,
2872 name: String,
2873 qualified_name: String,
2874 kind: String,
2875}
2876
2877impl LogicalSymbolKey {
2878 fn from(row: &LogicalSymbolMemberRow) -> Self {
2879 Self {
2880 language: row.language.clone(),
2881 path: row.path.clone(),
2882 name: row.name.clone(),
2883 qualified_name: row.qualified_name.clone(),
2884 kind: row.kind.clone(),
2885 }
2886 }
2887}
2888
2889#[derive(Debug, Clone)]
2890struct LogicalSymbolMemberRow {
2891 symbol_id: i64,
2892 path: String,
2893 language: String,
2894 name: String,
2895 qualified_name: String,
2896 kind: String,
2897 signature: Option<String>,
2898 start_line: i64,
2899 end_line: i64,
2900}
2901
2902fn symbol_line_for_byte(
2903 text: &str,
2904 chunk_start_byte: usize,
2905 chunk_start_line: i64,
2906 byte: usize,
2907) -> i64 {
2908 if byte <= chunk_start_byte {
2909 return chunk_start_line.max(1);
2910 }
2911 let local = byte.saturating_sub(chunk_start_byte).min(text.len());
2912 chunk_start_line
2913 + i64::try_from(text[..local].bytes().filter(|byte| *byte == b'\n').count()).unwrap_or(0)
2914}
2915
2916fn graph_only_reason(edge: &crate::query::graph::GraphHop, current_line: Option<&str>) -> String {
2917 let Some(line) = current_line else {
2918 return "missing_current_source_line".to_string();
2919 };
2920 if edge
2921 .target_qualified_name
2922 .as_deref()
2923 .is_some_and(|qualified| !qualified.is_empty() && line.contains(qualified))
2924 {
2925 return "qualified_call_pattern_mismatch".to_string();
2926 }
2927 if edge.target.as_deref().is_some_and(|target| !target.is_empty() && line.contains(target)) {
2928 return "imported_or_unqualified_call".to_string();
2929 }
2930 if edge
2931 .evidence
2932 .as_deref()
2933 .is_some_and(|evidence| !evidence.is_empty() && line.contains(evidence.trim()))
2934 {
2935 return "regex_too_narrow".to_string();
2936 }
2937 "stale_or_overbroad_graph_edge".to_string()
2938}
2939
2940fn is_likely_false_positive_graph_only(
2941 edge: &crate::query::graph::GraphHop,
2942 graph_only: &crate::query::graph::GraphOnlyEdge,
2943) -> bool {
2944 if graph_only.likely_reason == "stale_or_overbroad_graph_edge" {
2945 return true;
2946 }
2947 edge.resolution == "target_name_fallback"
2948 || edge.confidence == "NameOnly"
2949 || edge.confidence == "Ambiguous"
2950 || !edge.verified_target_symbol
2951}
2952
2953fn classify_text_only_hit(
2954 path: &str,
2955 text: &str,
2956 parser_failure_paths: &BTreeSet<String>,
2957) -> &'static str {
2958 if parser_failure_paths.contains(path) {
2959 return "parser_failure";
2960 }
2961 if is_generated_path(path) {
2962 return "generated_text_mention";
2963 }
2964 let trimmed = text.trim_start();
2965 if is_comment_like_text(trimmed) {
2966 return "comment_text_mention";
2967 }
2968 if is_import_or_declaration_text(trimmed) {
2969 return "declaration_text_mention";
2970 }
2971 if is_test_like_path(path) && is_test_scaffolding_text(trimmed) {
2972 return "test_scaffolding_text_mention";
2973 }
2974 "parser_call_extraction"
2975}
2976
2977fn is_likely_parser_gap_kind(kind: &str) -> bool {
2978 matches!(kind, "parser_call_extraction" | "parser_failure")
2979}
2980
2981fn is_generated_path(path: &str) -> bool {
2982 path.contains("/generated/")
2983 || path.contains("/generated-web/")
2984 || path.ends_with(".d.ts")
2985 || path.ends_with("_bg.wasm.d.ts")
2986}
2987
2988fn is_comment_like_text(text: &str) -> bool {
2989 text.starts_with("//")
2990 || text.starts_with("/*")
2991 || text.starts_with('*')
2992 || text.starts_with("*/")
2993 || text.starts_with("#")
2994}
2995
2996fn is_import_or_declaration_text(text: &str) -> bool {
2997 text.starts_with("import ")
2998 || text.starts_with("export type ")
2999 || text.starts_with("export interface ")
3000 || text.starts_with("type ")
3001 || text.starts_with("interface ")
3002 || text.starts_with("declare ")
3003}
3004
3005fn is_test_scaffolding_text(text: &str) -> bool {
3006 text.contains(".mock")
3007 || text.contains("jest.")
3008 || text.contains("jest<")
3009 || text.contains("expect(")
3010 || text.contains("toHaveBeen")
3011 || text.contains("describe(")
3012 || text.contains("it(")
3013 || text.contains("test(")
3014}
3015
3016fn recommended_graph_text_fallback(
3017 parser_gaps: &[crate::query::graph::TextOnlyHit],
3018 graph_only_edges: &[crate::query::graph::GraphOnlyEdge],
3019) -> String {
3020 match (parser_gaps.is_empty(), graph_only_edges.is_empty()) {
3021 (false, false) => "both",
3022 (false, true) => "text",
3023 (true, false) => "graph",
3024 (true, true) => "none",
3025 }
3026 .to_string()
3027}
3028
3029fn compare_pattern_match_mode(pattern: &str, symbol_name: &str) -> String {
3030 if symbol_name.is_empty() {
3031 return "regex".to_string();
3032 }
3033 let escaped_call = format!("{symbol_name}\\(");
3034 let plain_call = format!("{symbol_name}(");
3035 if pattern.contains("\\b")
3036 || pattern.contains("\\W")
3037 || pattern.contains("[^")
3038 || pattern.contains(&escaped_call)
3039 || pattern.contains(&plain_call)
3040 {
3041 return "identifier_or_call".to_string();
3042 }
3043 if pattern.contains(symbol_name) {
3044 return "substring_identifier".to_string();
3045 }
3046 "regex".to_string()
3047}
3048
3049fn is_test_like_path(path: &str) -> bool {
3050 let lower = path.to_ascii_lowercase();
3051 lower.contains("/test/")
3052 || lower.contains("/tests/")
3053 || lower.contains("/__tests__/")
3054 || lower.ends_with("_test.rs")
3055 || lower.ends_with(".test.ts")
3056 || lower.ends_with(".test.tsx")
3057 || lower.ends_with(".spec.ts")
3058 || lower.ends_with(".spec.tsx")
3059}
3060
3061#[derive(Debug)]
3062struct IndexedFile {
3063 path: String,
3064 sha256: String,
3065}
3066
3067#[derive(Debug, Clone)]
3068struct IndexFile {
3069 full_path: PathBuf,
3070 relative_path: PathBuf,
3071 language: Language,
3072 kind: TargetKind,
3073 commit_sha: String,
3074 worktree_id: String,
3075}
3076
3077#[derive(Debug, Clone)]
3078struct FileScope {
3079 commit_sha: String,
3080 worktree_id: String,
3081}
3082
3083impl FileScope {
3084 fn commit(commit_sha: String) -> Self {
3085 Self { commit_sha, worktree_id: String::new() }
3086 }
3087
3088 fn worktree(worktree_id: String) -> Self {
3089 Self { commit_sha: String::new(), worktree_id }
3090 }
3091}
3092
3093#[derive(Debug)]
3094struct PreparedIndexFile {
3095 file: IndexFile,
3096 prepared: anyhow::Result<PreparedIndexContent>,
3097}
3098
3099#[derive(Debug)]
3100struct PreparedIndexContent {
3101 modified_at_ms: i64,
3102 text: String,
3103 sha256: String,
3104 chunks: Vec<Chunk>,
3105 symbols: Vec<Symbol>,
3106 parser_failure: Option<String>,
3107}
3108
3109#[derive(Debug)]
3110struct DiscoveryPlan {
3111 files: Vec<IndexFile>,
3112 deleted: BTreeSet<PathBuf>,
3113 unindexed: Vec<IndexFile>,
3114 changed: Vec<PathBuf>,
3115 discovered_files: usize,
3116 indexed_files: usize,
3117}
3118
3119#[derive(Debug, Default)]
3120struct GitChangedPaths {
3121 changed: BTreeSet<PathBuf>,
3122 deleted: BTreeSet<PathBuf>,
3123}
3124
3125fn collect_index_files(config: &Config) -> anyhow::Result<Vec<IndexFile>> {
3126 let mut targets = config.targets.iter().collect::<Vec<_>>();
3127 targets.sort_by_key(|target| match target.kind {
3128 TargetKind::Generated => 0,
3129 TargetKind::Tests => 1,
3130 TargetKind::Docs => 2,
3131 TargetKind::Source => 3,
3132 });
3133 let mut seen = BTreeSet::new();
3134 let mut files = Vec::new();
3135
3136 for target in targets {
3137 for file in walker::walk_target(&config.root, target)? {
3138 let relative_path = file.strip_prefix(&config.root)?.to_path_buf();
3139 if !seen.insert(relative_path.clone()) {
3140 continue;
3141 }
3142 files.push(IndexFile {
3143 full_path: file,
3144 relative_path,
3145 language: target.language,
3146 kind: target.kind,
3147 commit_sha: String::new(),
3148 worktree_id: String::new(),
3149 });
3150 }
3151 }
3152
3153 Ok(files)
3154}
3155
3156fn collect_changed_index_files(
3157 config: &Config,
3158 changes: &GitChangedPaths,
3159) -> anyhow::Result<Vec<IndexFile>> {
3160 let mut files = Vec::new();
3161 for relative_path in &changes.changed {
3162 let full_path = config.root.join(relative_path);
3163 if !full_path.is_file() {
3164 continue;
3165 }
3166 let Some((language, kind)) = target_for_path(config, relative_path) else {
3167 continue;
3168 };
3169 files.push(IndexFile {
3170 full_path,
3171 relative_path: relative_path.clone(),
3172 language,
3173 kind,
3174 commit_sha: String::new(),
3175 worktree_id: String::new(),
3176 });
3177 }
3178 Ok(files)
3179}
3180
3181fn spawn_git_history_prepare(
3182 root: &Path,
3183) -> JoinHandle<anyhow::Result<git_history::PreparedGitHistory>> {
3184 let root = root.to_path_buf();
3185 thread::spawn(move || git_history::prepare(&root))
3186}
3187
3188fn join_git_history_prepare(
3189 handle: JoinHandle<anyhow::Result<git_history::PreparedGitHistory>>,
3190) -> anyhow::Result<git_history::PreparedGitHistory> {
3191 handle.join().map_err(|_| anyhow::anyhow!("git history preparation panicked"))?
3192}
3193
3194fn prepare_index_file(file: &IndexFile) -> PreparedIndexFile {
3195 PreparedIndexFile { file: file.clone(), prepared: prepare_index_content(file) }
3196}
3197
3198fn prepare_files_with_progress<F>(
3199 files: &[IndexFile],
3200 progress: &mut F,
3201) -> anyhow::Result<Vec<PreparedIndexFile>>
3202where
3203 F: FnMut(IndexProgress),
3204{
3205 #[derive(Debug)]
3206 struct PreparedProgress {
3207 current: usize,
3208 total: usize,
3209 path: PathBuf,
3210 language: Language,
3211 kind: TargetKind,
3212 }
3213
3214 let total = files.len();
3215 let prepared = thread::scope(|scope| {
3216 let (tx, rx) = mpsc::channel();
3217 let completed = AtomicUsize::new(0);
3218 let handle = scope.spawn(move || {
3219 files
3220 .par_iter()
3221 .map(|file| {
3222 let prepared = prepare_index_file(file);
3223 let current = completed.fetch_add(1, Ordering::Relaxed) + 1;
3224 if should_report_file_progress(current, total) {
3225 let _ = tx.send(PreparedProgress {
3226 current,
3227 total,
3228 path: file.relative_path.clone(),
3229 language: file.language,
3230 kind: file.kind,
3231 });
3232 }
3233 prepared
3234 })
3235 .collect::<Vec<_>>()
3236 });
3237
3238 for event in rx {
3239 progress(IndexProgress::PreparingFile {
3240 current: event.current,
3241 total: event.total,
3242 path: event.path,
3243 language: event.language,
3244 kind: event.kind,
3245 });
3246 }
3247
3248 handle.join().map_err(|_| anyhow::anyhow!("parallel file preparation panicked"))
3249 })?;
3250 Ok(prepared)
3251}
3252
3253fn should_report_file_progress(current: usize, total: usize) -> bool {
3254 if total == 0 {
3255 return false;
3256 }
3257 current == 1
3258 || current == total
3259 || current.saturating_mul(10) / total
3260 != current.saturating_sub(1).saturating_mul(10) / total
3261}
3262
3263fn prepare_index_content(file: &IndexFile) -> anyhow::Result<PreparedIndexContent> {
3264 let text = fs::read_to_string(&file.full_path)?;
3265 let modified_at_ms = file_metadata_ms(&file.full_path)?;
3266 let sha256 = hex_sha256(text.as_bytes());
3267 let parser_failure =
3268 if file.language != Language::Markdown && file.kind != TargetKind::Generated {
3269 if text.len() > chunker::MAX_STRUCTURAL_PARSE_BYTES {
3270 None
3271 } else {
3272 parser::parse_error(&file.relative_path, file.language, &text)
3273 .unwrap_or_else(|err| Some(err.to_string()))
3274 }
3275 } else {
3276 None
3277 };
3278 let chunks = if file.kind == TargetKind::Generated {
3279 chunker::generated_chunks_for_file(&file.relative_path, &text)
3280 } else {
3281 chunker::chunks_for_file(&file.relative_path, file.language, &text)
3282 };
3283 let symbols =
3284 if file.kind == TargetKind::Generated || text.len() > chunker::MAX_STRUCTURAL_PARSE_BYTES {
3285 Vec::new()
3286 } else {
3287 symbols::symbols_for_file(&file.relative_path, file.language, &text)
3288 };
3289 Ok(PreparedIndexContent { modified_at_ms, text, sha256, chunks, symbols, parser_failure })
3290}
3291
3292fn discovery_plan(conn: &rusqlite::Connection, config: &Config) -> anyhow::Result<DiscoveryPlan> {
3293 let discovered = collect_index_files(config)?;
3294 let mut indexed = indexed_file_map(conn)?;
3295 let mut current_paths = BTreeSet::new();
3296 let mut files = Vec::new();
3297 let mut unindexed = Vec::new();
3298 let mut changed = Vec::new();
3299 let discovered_files = discovered.len();
3300 let hashed = discovered
3301 .par_iter()
3302 .map(|file| -> anyhow::Result<(IndexFile, String)> {
3303 let text = fs::read(&file.full_path)?;
3304 Ok((file.clone(), hex_sha256(&text)))
3305 })
3306 .collect::<Vec<_>>();
3307
3308 for hashed_file in hashed {
3309 let (file, current_hash) = hashed_file?;
3310 let relative = path_string(&file.relative_path);
3311 current_paths.insert(file.relative_path.clone());
3312 let Some(indexed_hash) = indexed.remove(&relative) else {
3313 unindexed.push(file.clone());
3314 files.push(file);
3315 continue;
3316 };
3317 if current_hash != indexed_hash {
3318 changed.push(file.relative_path.clone());
3319 files.push(file);
3320 }
3321 }
3322
3323 let deleted = indexed
3324 .into_keys()
3325 .map(PathBuf::from)
3326 .filter(|path| !current_paths.contains(path))
3327 .collect::<BTreeSet<_>>();
3328
3329 Ok(DiscoveryPlan {
3330 discovered_files,
3331 indexed_files: current_paths
3332 .len()
3333 .saturating_add(deleted.len())
3334 .saturating_sub(unindexed.len()),
3335 files,
3336 deleted,
3337 unindexed,
3338 changed,
3339 })
3340}
3341
3342fn indexed_file_map(conn: &rusqlite::Connection) -> anyhow::Result<BTreeMap<String, String>> {
3343 let mut stmt = conn.prepare("SELECT path, sha256 FROM files ORDER BY path")?;
3344 let rows =
3345 stmt.query_map([], |row| Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)))?;
3346 let mut files = BTreeMap::new();
3347 for row in rows {
3348 let (path, sha256) = row?;
3349 files.insert(path, sha256);
3350 }
3351 Ok(files)
3352}
3353
3354fn target_for_path(config: &Config, relative_path: &Path) -> Option<(Language, TargetKind)> {
3355 let relative = path_string(relative_path);
3356 let language = Language::from_path(relative_path)?;
3357 let mut targets = config.targets.iter().collect::<Vec<_>>();
3358 targets.sort_by_key(|target| match target.kind {
3359 TargetKind::Generated => 0,
3360 TargetKind::Tests => 1,
3361 TargetKind::Docs => 2,
3362 TargetKind::Source => 3,
3363 });
3364 targets.into_iter().find_map(|target| {
3365 if target.language != language {
3366 return None;
3367 }
3368 if !target.directories.iter().any(|directory| {
3369 directory.as_os_str().is_empty()
3370 || directory == Path::new(".")
3371 || relative_path.starts_with(directory)
3372 }) {
3373 return None;
3374 }
3375 if target.exclude.iter().any(|pattern| matches_simple_pattern(&relative, pattern)) {
3376 return None;
3377 }
3378 if !target.include.iter().any(|pattern| matches_simple_pattern(&relative, pattern)) {
3379 return None;
3380 }
3381 Some((target.language, target.kind))
3382 })
3383}
3384
3385fn git_changed_paths(root: &Path) -> anyhow::Result<GitChangedPaths> {
3386 let repo = gix::discover(root)?;
3387 let worktree_root = repo
3388 .workdir()
3389 .ok_or_else(|| anyhow::anyhow!("git repository has no worktree"))?
3390 .to_path_buf();
3391 let pathspec = config_root_pathspec(&worktree_root, root);
3392 let mut paths = GitChangedPaths::default();
3393
3394 for item in repo
3395 .status(gix::progress::Discard)?
3396 .untracked_files(UntrackedFiles::Files)
3397 .tree_index_track_renames(tree_index::TrackRenames::Disabled)
3398 .into_iter([pathspec])?
3399 {
3400 let item = item?;
3401 let Some(path) = repo_relative_path_to_config_path(&worktree_root, root, item.location())
3402 else {
3403 continue;
3404 };
3405 if root.join(&path).exists() {
3406 if !paths.deleted.contains(&path) {
3407 paths.changed.insert(path);
3408 }
3409 } else {
3410 paths.changed.remove(&path);
3411 paths.deleted.insert(path);
3412 }
3413 }
3414
3415 Ok(paths)
3416}
3417
3418fn repo_relative_path_to_config_path(
3419 worktree_root: &Path,
3420 config_root: &Path,
3421 repo_relative_path: &gix::bstr::BStr,
3422) -> Option<PathBuf> {
3423 let path = PathBuf::from(repo_relative_path.to_str_lossy().as_ref());
3424 worktree_root.join(path).strip_prefix(config_root).ok().map(Path::to_path_buf)
3425}
3426
3427fn config_root_pathspec(worktree_root: &Path, config_root: &Path) -> BString {
3428 let relative = config_root.strip_prefix(worktree_root).unwrap_or_else(|_| Path::new(""));
3429 let relative = path_string(relative);
3430 if relative.is_empty() || relative == "." {
3431 BString::from("*")
3432 } else {
3433 BString::from(format!("{relative}/**"))
3434 }
3435}
3436
3437fn matches_simple_pattern(path: &str, pattern: &str) -> bool {
3438 if let Some(extension) = pattern.strip_prefix("**/*.") {
3439 return path.ends_with(&format!(".{extension}"));
3440 }
3441 if let Some(prefix) = pattern.strip_suffix("/**") {
3442 return path.starts_with(prefix);
3443 }
3444 path == pattern || path.contains(pattern.trim_matches('*'))
3445}
3446
3447fn meta_for(conn: &rusqlite::Connection, key: &str) -> anyhow::Result<Option<String>> {
3448 Ok(conn
3449 .query_row("SELECT value FROM index_meta WHERE key = ?1", [key], |row| row.get(0))
3450 .optional()?)
3451}
3452
3453fn git_output(root: &Path, args: &[&str]) -> Option<String> {
3454 let output = Command::new("git").args(args).current_dir(root).output().ok()?;
3455 if !output.status.success() {
3456 return None;
3457 }
3458 Some(String::from_utf8_lossy(&output.stdout).trim().to_string())
3459}
3460
3461fn resolve_git_context(root: &Path) -> (String, String) {
3462 let commit_sha =
3463 git_output(root, &["rev-parse", "HEAD"]).map(|s| s.trim().to_string()).unwrap_or_default();
3464 let worktree_id = root.to_string_lossy().trim_end_matches('/').to_string();
3465 (commit_sha, worktree_id)
3466}
3467
3468fn file_metadata_ms(path: &Path) -> anyhow::Result<i64> {
3469 let modified = fs::metadata(path)?.modified()?;
3470 Ok(duration_ms(modified.duration_since(UNIX_EPOCH)?))
3471}
3472
3473fn now_ms() -> i64 {
3474 duration_ms(SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default())
3475}
3476
3477fn duration_ms(duration: std::time::Duration) -> i64 {
3478 i64::try_from(duration.as_millis()).unwrap_or(i64::MAX)
3479}
3480
3481fn hex_sha256(bytes: &[u8]) -> String {
3482 let hash = Sha256::digest(bytes);
3483 let mut out = String::with_capacity(hash.len() * 2);
3484 for byte in hash {
3485 use std::fmt::Write as _;
3486 let _ = write!(out, "{byte:02x}");
3487 }
3488 out
3489}
3490
3491fn path_string(path: &Path) -> String {
3492 path.to_string_lossy().replace('\\', "/")
3493}
3494
3495#[cfg(test)]
3496mod schema_bootstrap_tests {
3497 use std::sync::atomic::{AtomicU64, Ordering};
3498
3499 use super::*;
3500 use crate::config::ResolvedTarget;
3501
3502 static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
3503
3504 #[test]
3505 fn rebuild_bootstraps_sqlite_schema_for_empty_target_root() {
3506 let root = unique_temp_root();
3507 let _ = fs::remove_dir_all(&root);
3508 let docs = root.join("docs");
3509 fs::create_dir_all(&docs).unwrap();
3510
3511 let config = Config {
3512 root: root.clone(),
3513 database: root.join(".rag-rat/index.sqlite"),
3514 targets: vec![ResolvedTarget {
3515 name: "markdown".to_string(),
3516 language: Language::Markdown,
3517 directories: vec![PathBuf::from("docs")],
3518 include: vec!["**/*.md".to_string()],
3519 exclude: Vec::new(),
3520 kind: TargetKind::Docs,
3521 }],
3522 local_ai: Default::default(),
3523 };
3524
3525 let db = IndexDatabase::rebuild(&config).unwrap();
3526 assert!(config.database.exists());
3527 assert_eq!(table_count(&db, "files"), 1);
3528 assert_eq!(table_count(&db, "chunks"), 1);
3529 assert_eq!(table_count(&db, "symbols"), 1);
3530 assert_eq!(table_count(&db, "parser_failures"), 1);
3531 assert_eq!(table_count(&db, "index_meta"), 1);
3532 assert_eq!(table_count(&db, "chunk_fts"), 1);
3533 assert_eq!(table_count(&db, "git_commits"), 1);
3534 assert_eq!(table_count(&db, "git_file_changes"), 1);
3535 assert_eq!(table_count(&db, "git_chunk_blame"), 1);
3536 assert_eq!(table_count(&db, "commit_fts"), 1);
3537 assert_eq!(table_count(&db, "ai_models"), 1);
3538 assert_eq!(table_count(&db, "chunk_embeddings"), 1);
3539 assert_eq!(table_count(&db, "chunk_summaries"), 1);
3540 assert_eq!(table_count(&db, "reconcile_meta"), 1);
3541 assert_eq!(table_count(&db, "reconcile_attempts"), 1);
3542 assert!(file_columns(&db).contains(&"indexed_revision".to_string()));
3543 assert_eq!(indexed_revision_count(&db), 0);
3544 assert!(chunk_columns(&db).contains(&"anchor_version".to_string()));
3545 assert!(chunk_columns(&db).contains(&"normalized_hash".to_string()));
3546 assert!(chunk_columns(&db).contains(&"start_boundary_hash".to_string()));
3547 assert!(chunk_columns(&db).contains(&"end_boundary_hash".to_string()));
3548 assert!(chunk_columns(&db).contains(&"source_revision".to_string()));
3549 let embedding_columns = table_columns(&db, "chunk_embeddings");
3550 assert!(embedding_columns.contains(&"model_version".to_string()));
3551 assert!(embedding_columns.contains(&"input_hash".to_string()));
3552 assert!(embedding_columns.contains(&"embedding_text_version".to_string()));
3553 assert!(embedding_columns.contains(&"embedding_policy".to_string()));
3554 assert!(embedding_columns.contains(&"embedding_priority".to_string()));
3555 assert!(embedding_columns.contains(&"input_chars".to_string()));
3556 assert!(embedding_columns.contains(&"input_truncated".to_string()));
3557 assert!(embedding_columns.contains(&"attempt_count".to_string()));
3558 assert!(embedding_columns.contains(&"next_retry_after_ms".to_string()));
3559 assert!(embedding_columns.contains(&"computed_at_ms".to_string()));
3560 let edge_columns = table_columns(&db, "edges");
3561 assert!(edge_columns.contains(&"source_start_line".to_string()));
3562 assert!(edge_columns.contains(&"source_end_line".to_string()));
3563 assert!(edge_columns.contains(&"source_start_byte".to_string()));
3564 assert!(edge_columns.contains(&"source_end_byte".to_string()));
3565 assert!(edge_columns.contains(&"target_start_line".to_string()));
3566 assert!(edge_columns.contains(&"target_end_line".to_string()));
3567 assert!(edge_columns.contains(&"target_qualified_name".to_string()));
3568 assert!(edge_columns.contains(&"evidence".to_string()));
3569 assert!(edge_columns.contains(&"receiver_hint".to_string()));
3570 assert!(edge_columns.contains(&"resolution".to_string()));
3571 let logical_columns = table_columns(&db, "logical_symbols");
3572 assert!(logical_columns.contains(&"qualified_name".to_string()));
3573 assert!(logical_columns.contains(&"variant_count".to_string()));
3574 let member_columns = table_columns(&db, "logical_symbol_members");
3575 assert!(member_columns.contains(&"symbol_id".to_string()));
3576 assert!(member_columns.contains(&"signature_hash".to_string()));
3577 let github_ref_sync_columns = table_columns(&db, "github_ref_sync");
3578 assert!(github_ref_sync_columns.contains(&"status".to_string()));
3579 assert!(github_ref_sync_columns.contains(&"last_error".to_string()));
3580 let symbol_fact_columns = table_columns(&db, "symbol_facts");
3581 assert!(symbol_fact_columns.contains(&"fact_kind".to_string()));
3582 assert!(symbol_fact_columns.contains(&"fact_value".to_string()));
3583 assert_eq!(
3584 db.status(&config.database).unwrap().schema.current_version,
3585 schema::LATEST_SCHEMA_VERSION
3586 );
3587
3588 fs::remove_dir_all(root).unwrap();
3589 }
3590
3591 #[test]
3592 fn rebuild_reports_file_preparation_progress() {
3593 let root = unique_temp_root();
3594 let _ = fs::remove_dir_all(&root);
3595 fs::create_dir_all(root.join("src")).unwrap();
3596 fs::write(root.join("src/lib.rs"), "pub fn exported() {}\n").unwrap();
3597
3598 let config = source_config(root.clone(), Language::Rust);
3599 let mut events = Vec::new();
3600 IndexDatabase::rebuild_with_progress(&config, |progress| events.push(progress)).unwrap();
3601
3602 assert!(
3603 events.iter().any(|event| matches!(event, IndexProgress::PreparingFile { .. })),
3604 "missing preparing progress event: {events:?}"
3605 );
3606 assert!(
3607 events.iter().any(|event| matches!(event, IndexProgress::IndexingFile { .. })),
3608 "missing indexing progress event: {events:?}"
3609 );
3610
3611 fs::remove_dir_all(root).unwrap();
3612 }
3613
3614 #[test]
3615 fn file_progress_reports_first_final_and_decile_boundaries() {
3616 let reported = (1..=100)
3617 .filter(|current| should_report_file_progress(*current, 100))
3618 .collect::<Vec<_>>();
3619 assert_eq!(reported, vec![1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]);
3620 }
3621
3622 #[test]
3623 fn compatible_open_requires_recorded_schema_version() {
3624 let root = unique_temp_root();
3625 let _ = fs::remove_dir_all(&root);
3626 fs::create_dir_all(root.join(".rag-rat")).unwrap();
3627 let database = root.join(".rag-rat/index.sqlite");
3628 IndexDatabase::migrate(&database).unwrap();
3629 let conn = rusqlite::Connection::open(&database).unwrap();
3630 conn.execute_batch("DROP TABLE schema_version;").unwrap();
3631 drop(conn);
3632
3633 let status = IndexDatabase::migration_check(&database).unwrap();
3634 assert_eq!(status.state, schema::SchemaState::Older);
3635 let err = IndexDatabase::open(&database).unwrap_err().to_string();
3636 assert!(err.contains("run `rag-rat migrate`"), "{err}");
3637
3638 let migrated = IndexDatabase::migrate(&database).unwrap();
3639 assert_eq!(migrated.state, schema::SchemaState::Compatible);
3640 IndexDatabase::open(&database).unwrap();
3641
3642 fs::remove_dir_all(root).unwrap();
3643 }
3644
3645 #[test]
3646 fn migrate_adds_edge_name_columns_before_indexing_them() {
3647 let root = unique_temp_root();
3648 let _ = fs::remove_dir_all(&root);
3649 fs::create_dir_all(root.join(".rag-rat")).unwrap();
3650 let database = root.join(".rag-rat/index.sqlite");
3651 let conn = rusqlite::Connection::open(&database).unwrap();
3652 conn.execute_batch(
3653 "
3654 CREATE TABLE files(
3655 id INTEGER PRIMARY KEY AUTOINCREMENT,
3656 path TEXT NOT NULL UNIQUE,
3657 language TEXT NOT NULL,
3658 kind TEXT NOT NULL,
3659 sha256 TEXT NOT NULL,
3660 modified_at_ms INTEGER NOT NULL,
3661 generated INTEGER NOT NULL DEFAULT 0,
3662 indexed_at_ms INTEGER NOT NULL
3663 );
3664 CREATE TABLE chunks(
3665 id INTEGER PRIMARY KEY AUTOINCREMENT,
3666 file_id INTEGER NOT NULL,
3667 chunk_kind TEXT NOT NULL,
3668 symbol_path TEXT,
3669 start_byte INTEGER NOT NULL,
3670 end_byte INTEGER NOT NULL,
3671 start_line INTEGER NOT NULL,
3672 end_line INTEGER NOT NULL,
3673 text TEXT NOT NULL,
3674 text_hash TEXT NOT NULL
3675 );
3676 CREATE TABLE symbols(
3677 id INTEGER PRIMARY KEY AUTOINCREMENT,
3678 file_id INTEGER NOT NULL,
3679 language TEXT NOT NULL,
3680 name TEXT NOT NULL,
3681 qualified_name TEXT NOT NULL,
3682 kind TEXT NOT NULL,
3683 start_byte INTEGER NOT NULL,
3684 end_byte INTEGER NOT NULL,
3685 signature TEXT,
3686 docs TEXT
3687 );
3688 CREATE TABLE edges(
3689 id INTEGER PRIMARY KEY AUTOINCREMENT,
3690 from_symbol_id INTEGER,
3691 to_symbol_id INTEGER,
3692 edge_kind TEXT NOT NULL,
3693 confidence TEXT NOT NULL
3694 );
3695 ",
3696 )
3697 .unwrap();
3698 drop(conn);
3699
3700 let migrated = IndexDatabase::migrate(&database).unwrap();
3701 assert_eq!(migrated.state, schema::SchemaState::Compatible);
3702 let db = IndexDatabase::open(&database).unwrap();
3703 let columns = table_columns(&db, "edges");
3704 assert!(columns.contains(&"from_name".to_string()));
3705 assert!(columns.contains(&"to_name".to_string()));
3706 assert!(columns.contains(&"source_start_line".to_string()));
3707 assert!(columns.contains(&"source_end_line".to_string()));
3708 assert!(columns.contains(&"source_start_byte".to_string()));
3709 assert!(columns.contains(&"source_end_byte".to_string()));
3710 assert!(columns.contains(&"target_start_line".to_string()));
3711 assert!(columns.contains(&"target_end_line".to_string()));
3712 assert_eq!(table_count(&db, "idx_edges_from_name"), 1);
3713 assert_eq!(table_count(&db, "idx_edges_to_name"), 1);
3714
3715 fs::remove_dir_all(root).unwrap();
3716 }
3717
3718 #[test]
3719 fn migrate_preserves_github_papertrail_cache() {
3720 let (root, config) =
3721 markdown_config("# Decision\nRefs cq27-dev/rag-rat#42\nwe will keep sqlite\n");
3722 let db = IndexDatabase::rebuild(&config).unwrap();
3723 github::sync_from_refs(db.storage.connection(), &root, Some(&MockGitHubClient), false)
3724 .unwrap();
3725 assert_eq!(row_count(&db, "github_refs"), 1);
3726 assert_eq!(row_count(&db, "github_issues"), 1);
3727 assert_eq!(row_count(&db, "github_comments"), 1);
3728 assert_eq!(row_count(&db, "github_pull_requests"), 1);
3729 assert_eq!(row_count(&db, "github_reviews"), 1);
3730 assert_eq!(row_count(&db, "github_review_comments"), 1);
3731 assert_eq!(row_count(&db, "github_fts"), 5);
3732 db.storage
3733 .connection()
3734 .execute("DELETE FROM schema_version WHERE id = ?1", ["010_symbol_facts"])
3735 .unwrap();
3736 drop(db);
3737
3738 let migrated = IndexDatabase::migrate(&config.database).unwrap();
3739 assert_eq!(migrated.state, schema::SchemaState::Compatible);
3740 let db = IndexDatabase::open(&config.database).unwrap();
3741 assert_eq!(row_count(&db, "github_refs"), 1);
3742 assert_eq!(row_count(&db, "github_issues"), 1);
3743 assert_eq!(row_count(&db, "github_comments"), 1);
3744 assert_eq!(row_count(&db, "github_pull_requests"), 1);
3745 assert_eq!(row_count(&db, "github_reviews"), 1);
3746 assert_eq!(row_count(&db, "github_review_comments"), 1);
3747 assert_eq!(row_count(&db, "github_fts"), 5);
3748 let hits = db.github_issue_search("sqlite", 10).unwrap();
3749 assert_eq!(hits.len(), 1);
3750 assert_eq!(hits[0].number, 42);
3751
3752 fs::remove_dir_all(root).unwrap();
3753 }
3754
3755 #[test]
3756 fn full_rebuild_preserves_github_papertrail_cache() {
3757 let (root, config) =
3758 markdown_config("# Decision\nRefs cq27-dev/rag-rat#42\nwe will keep sqlite\n");
3759 let db = IndexDatabase::rebuild(&config).unwrap();
3760 github::sync_from_refs(db.storage.connection(), &root, Some(&MockGitHubClient), false)
3761 .unwrap();
3762 assert_eq!(row_count(&db, "github_issues"), 1);
3763 assert_eq!(row_count(&db, "github_fts"), 5);
3764 drop(db);
3765
3766 let db = IndexDatabase::rebuild(&config).unwrap();
3767
3768 assert_eq!(row_count(&db, "github_refs"), 1);
3769 assert_eq!(row_count(&db, "github_issues"), 1);
3770 assert_eq!(row_count(&db, "github_comments"), 1);
3771 assert_eq!(row_count(&db, "github_pull_requests"), 1);
3772 assert_eq!(row_count(&db, "github_reviews"), 1);
3773 assert_eq!(row_count(&db, "github_review_comments"), 1);
3774 assert_eq!(row_count(&db, "github_ref_sync"), 1);
3775 assert_eq!(row_count(&db, "github_fts"), 5);
3776 let hits = db.github_issue_search("sqlite", 10).unwrap();
3777 assert_eq!(hits.len(), 1);
3778 assert_eq!(hits[0].number, 42);
3779
3780 fs::remove_dir_all(root).unwrap();
3781 }
3782
3783 #[test]
3784 fn full_rebuild_preserves_installed_model_manifest() {
3785 let (root, config) = markdown_config("alpha token with enough detail for embeddings\n");
3786 let db = IndexDatabase::rebuild(&config).unwrap();
3787 db.install_model(ai::HASH_MODEL_ID).unwrap();
3788 let before = db.local_ai_status().unwrap();
3789 assert_eq!(before.embedding.model_id, ai::HASH_MODEL_ID);
3790 assert!(before.embedding.installed);
3791 drop(db);
3792
3793 let db = IndexDatabase::rebuild(&config).unwrap();
3794
3795 let after = db.local_ai_status().unwrap();
3796 assert_eq!(after.embedding.model_id, ai::HASH_MODEL_ID);
3797 assert!(after.embedding.installed);
3798 assert_eq!(after.embedding.state, "Ready");
3799
3800 fs::remove_dir_all(root).unwrap();
3801 }
3802
3803 #[test]
3804 fn full_rebuild_preserves_other_worktree_contexts() {
3805 let root = unique_temp_root();
3806 let _ = fs::remove_dir_all(&root);
3807 fs::create_dir_all(root.join("src")).unwrap();
3808 fs::write(root.join("src/lib.rs"), "pub fn current_context() {}\n").unwrap();
3809 let config = source_config(root.clone(), Language::Rust);
3810 let db = IndexDatabase::rebuild(&config).unwrap();
3811 let other_file_id = db
3812 .storage
3813 .connection()
3814 .query_row(
3815 "
3816 INSERT INTO main.files(
3817 path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms,
3818 indexed_revision, commit_sha, worktree_id
3819 )
3820 VALUES ('src/other.rs', 'rust', 'source', 'other-sha', 0, 0, 1, 'other-sha', '', 'other-worktree')
3821 RETURNING id
3822 ",
3823 [],
3824 |row| row.get::<_, i64>(0),
3825 )
3826 .unwrap();
3827 let other_chunk_id = db
3828 .storage
3829 .connection()
3830 .query_row(
3831 "
3832 INSERT INTO main.chunks(
3833 file_id, chunk_kind, symbol_path, start_byte, end_byte, start_line, end_line,
3834 text, text_hash, source_revision, anchor_version, normalized_hash,
3835 start_boundary_hash, end_boundary_hash, start_context_hash, end_context_hash,
3836 context_radius, embedding_policy, embedding_priority
3837 )
3838 VALUES (?1, 'symbol', 'other_context', 0, 12, 1, 1, 'other context', 'other-text',
3839 'other-sha', 1, '', '', '', '', '', 2, 'Embed', 1)
3840 RETURNING id
3841 ",
3842 [other_file_id],
3843 |row| row.get::<_, i64>(0),
3844 )
3845 .unwrap();
3846 db.storage
3847 .connection()
3848 .execute(
3849 "
3850 INSERT INTO main.symbols(
3851 file_id, language, name, qualified_name, kind, start_byte, end_byte, signature, docs
3852 )
3853 VALUES (?1, 'rust', 'other_context', 'other_context', 'function', 0, 12, NULL, NULL)
3854 ",
3855 [other_file_id],
3856 )
3857 .unwrap();
3858 db.storage
3859 .connection()
3860 .execute(
3861 "INSERT INTO main.chunk_fts(rowid, text) VALUES (?1, 'other context')",
3862 [other_chunk_id],
3863 )
3864 .unwrap();
3865 drop(db);
3866
3867 let db = IndexDatabase::rebuild(&config).unwrap();
3868
3869 assert_eq!(
3870 db.storage
3871 .connection()
3872 .query_row(
3873 "SELECT COUNT(*) FROM main.files WHERE worktree_id = 'other-worktree'",
3874 [],
3875 |row| row.get::<_, i64>(0)
3876 )
3877 .unwrap(),
3878 1
3879 );
3880 assert_eq!(
3881 db.storage
3882 .connection()
3883 .query_row(
3884 "SELECT COUNT(*) FROM main.chunks WHERE file_id = ?1",
3885 [other_file_id],
3886 |row| { row.get::<_, i64>(0) }
3887 )
3888 .unwrap(),
3889 1
3890 );
3891 assert_eq!(
3892 db.storage
3893 .connection()
3894 .query_row(
3895 "SELECT COUNT(*) FROM main.symbols WHERE file_id = ?1",
3896 [other_file_id],
3897 |row| { row.get::<_, i64>(0) }
3898 )
3899 .unwrap(),
3900 1
3901 );
3902 assert_eq!(
3903 db.storage
3904 .connection()
3905 .query_row(
3906 "SELECT COUNT(*) FROM main.chunk_fts WHERE rowid = ?1",
3907 [other_chunk_id],
3908 |row| { row.get::<_, i64>(0) }
3909 )
3910 .unwrap(),
3911 1
3912 );
3913
3914 fs::remove_dir_all(root).unwrap();
3915 }
3916
3917 #[test]
3918 fn compatible_open_refuses_dirty_and_newer_schema() {
3919 let root = unique_temp_root();
3920 let _ = fs::remove_dir_all(&root);
3921 fs::create_dir_all(root.join(".rag-rat")).unwrap();
3922 let database = root.join(".rag-rat/index.sqlite");
3923 let conn = rusqlite::Connection::open(&database).unwrap();
3924 conn.execute_batch(
3925 "
3926 CREATE TABLE schema_version(
3927 id TEXT PRIMARY KEY,
3928 applied_at_ms INTEGER NOT NULL,
3929 checksum TEXT NOT NULL,
3930 description TEXT NOT NULL
3931 );
3932 INSERT INTO schema_version(id, applied_at_ms, checksum, description)
3933 VALUES ('__dirty__', 1, '', 'partial migration in progress');
3934 ",
3935 )
3936 .unwrap();
3937 drop(conn);
3938
3939 let dirty = IndexDatabase::migration_check(&database).unwrap();
3940 assert_eq!(dirty.state, schema::SchemaState::Dirty);
3941 let err = IndexDatabase::open(&database).unwrap_err().to_string();
3942 assert!(err.contains("dirty or partial"), "{err}");
3943
3944 let conn = rusqlite::Connection::open(&database).unwrap();
3945 conn.execute_batch(
3946 "
3947 DELETE FROM schema_version;
3948 INSERT INTO schema_version(id, applied_at_ms, checksum, description)
3949 VALUES ('999_future_schema', 1, 'sha256:future', 'future schema');
3950 ",
3951 )
3952 .unwrap();
3953 drop(conn);
3954 let newer = IndexDatabase::migration_check(&database).unwrap();
3955 assert_eq!(newer.state, schema::SchemaState::Newer);
3956 let err = IndexDatabase::open(&database).unwrap_err().to_string();
3957 assert!(err.contains("newer rag-rat"), "{err}");
3958
3959 fs::remove_dir_all(root).unwrap();
3960 }
3961
3962 #[test]
3963 fn discover_mode_indexes_new_files_and_removes_deleted_files() {
3964 let root = unique_temp_root();
3965 let _ = fs::remove_dir_all(&root);
3966 fs::create_dir_all(root.join("src")).unwrap();
3967 fs::write(root.join("src/lib.rs"), "pub fn old_symbol() {}\n").unwrap();
3968 let config = source_config(root.clone(), Language::Rust);
3969 let db = IndexDatabase::rebuild(&config).unwrap();
3970 assert_eq!(db.discovery_status(&config).unwrap().unindexed_source_files, 0);
3971
3972 fs::write(root.join("src/new.rs"), "pub fn new_symbol() {}\n").unwrap();
3973 fs::remove_file(root.join("src/lib.rs")).unwrap();
3974 let drift = db.discovery_status(&config).unwrap();
3975 assert_eq!(drift.unindexed_source_files, 1);
3976 assert_eq!(drift.removed_indexed_files, 1);
3977 assert!(drift.warning.as_deref().unwrap().contains("rag-rat index --discover"));
3978
3979 let db = IndexDatabase::index_discover(&config).unwrap();
3980 let fresh = db.discovery_status(&config).unwrap();
3981 assert_eq!(fresh.unindexed_source_files, 0);
3982 assert_eq!(fresh.removed_indexed_files, 0);
3983 assert!(fresh.warning.is_none());
3984 assert_eq!(db.symbols("new_symbol", Some(Language::Rust), 10).unwrap().len(), 1);
3985 assert!(db.symbols("old_symbol", Some(Language::Rust), 10).unwrap().is_empty());
3986
3987 let mut events = Vec::new();
3988 let db = IndexDatabase::index_discover_with_progress(&config, |progress| {
3989 events.push(progress);
3990 })
3991 .unwrap();
3992 assert!(matches!(events.last(), Some(IndexProgress::Finished { files: 0 })));
3993 assert!(
3994 !events.iter().any(|event| matches!(
3995 event,
3996 IndexProgress::PreparingFile { .. } | IndexProgress::IndexingFile { .. }
3997 )),
3998 "no-op discover should not prepare or index files: {events:?}"
3999 );
4000 assert_eq!(db.symbols("new_symbol", Some(Language::Rust), 10).unwrap().len(), 1);
4001
4002 fs::remove_dir_all(root).unwrap();
4003 }
4004
4005 #[cfg(unix)]
4006 #[test]
4007 fn indexing_skips_symlink_loops() {
4008 let root = unique_temp_root();
4009 let _ = fs::remove_dir_all(&root);
4010 fs::create_dir_all(root.join("src")).unwrap();
4011 fs::write(root.join("src/lib.rs"), "pub fn loop_safe_symbol() {}\n").unwrap();
4012 std::os::unix::fs::symlink(&root, root.join("src/loop")).unwrap();
4013
4014 let config = source_config(root.clone(), Language::Rust);
4015 let db = IndexDatabase::rebuild(&config).unwrap();
4016
4017 assert_eq!(db.symbols("loop_safe_symbol", Some(Language::Rust), 10).unwrap().len(), 1);
4018
4019 fs::remove_dir_all(root).unwrap();
4020 }
4021
4022 #[test]
4023 fn dirty_git_files_are_indexed_as_worktree_overlay() {
4024 let root = unique_temp_root();
4025 let _ = fs::remove_dir_all(&root);
4026 let docs = root.join("docs");
4027 fs::create_dir_all(&docs).unwrap();
4028 fs::write(docs.join("search.md"), "# Title\nbase token\n").unwrap();
4029 run_git(&root, &["init"]);
4030 run_git(&root, &["add", "."]);
4031 run_git(
4032 &root,
4033 &[
4034 "-c",
4035 "user.name=Rag Rat Test",
4036 "-c",
4037 "user.email=rag-rat@example.invalid",
4038 "commit",
4039 "-m",
4040 "initial",
4041 ],
4042 );
4043
4044 let config = markdown_config_for_root(root.clone());
4045 let db = IndexDatabase::rebuild(&config).unwrap();
4046 assert_eq!(db.search("base", 10, false).unwrap().len(), 1);
4047
4048 fs::write(docs.join("search.md"), "# Title\noverlay token\n").unwrap();
4049 let db = IndexDatabase::index_changed(&config).unwrap();
4050 let scopes = db
4051 .storage
4052 .connection()
4053 .prepare(
4054 "
4055 SELECT commit_sha != '', worktree_id != ''
4056 FROM main.files
4057 WHERE path = 'docs/search.md'
4058 ORDER BY commit_sha != '' DESC, worktree_id != '' DESC
4059 ",
4060 )
4061 .unwrap()
4062 .query_map([], |row| Ok((row.get::<_, bool>(0)?, row.get::<_, bool>(1)?)))
4063 .unwrap()
4064 .collect::<Result<Vec<_>, _>>()
4065 .unwrap();
4066
4067 assert_eq!(scopes, vec![(true, false), (false, true)]);
4068 assert!(db.search("base", 10, false).unwrap().is_empty());
4069 let overlay_hits = db.search("overlay", 10, false).unwrap();
4070 assert_eq!(overlay_hits.len(), 1);
4071 assert!(overlay_hits[0].summary.contains("overlay token"));
4072
4073 fs::remove_dir_all(root).unwrap();
4074 }
4075
4076 #[test]
4077 fn rebuild_populates_revision_metadata_and_fresh_fts_state() {
4078 let (root, config) = markdown_config("alpha token");
4079 let db = IndexDatabase::rebuild(&config).unwrap();
4080 let status = db.status(&config.database).unwrap();
4081
4082 assert!(!status.content_revision.is_empty());
4083 assert_eq!(status.fts_source_revision.as_deref(), Some(status.content_revision.as_str()));
4084 assert_eq!(
4085 db.meta("content_revision").unwrap().as_deref(),
4086 Some(status.content_revision.as_str())
4087 );
4088 assert!(!status.fts_dirty);
4089 assert!(status.fts_fresh);
4090 assert!(!status.git_history.available);
4091 assert_eq!(status.git_history.commit_count, 0);
4092 assert_eq!(status.local_ai.embedding.state, "MissingModel");
4093 assert_eq!(status.local_ai.fastembed.backend, "fastembed");
4094 assert_eq!(status.local_ai.fastembed.model, ai::FASTEMBED_DISPLAY_MODEL);
4095 assert_eq!(status.local_ai.fastembed.dim, ai::FASTEMBED_EMBEDDING_DIM);
4096 assert!(!status.local_ai.fastembed.cache.is_empty());
4097 assert_eq!(status.local_ai.fastembed.build_feature_enabled, cfg!(feature = "fastembed"));
4098 assert_eq!(status.local_ai.artifacts.total_chunks, 1);
4099 assert_eq!(
4100 status.local_ai.artifacts.eligible_chunks + status.local_ai.artifacts.skipped_chunks,
4101 status.local_ai.artifacts.total_chunks
4102 );
4103 assert_eq!(
4104 status.local_ai.fastembed.eligible_embeddings
4105 + status.local_ai.fastembed.skipped_embeddings,
4106 status.local_ai.artifacts.total_chunks
4107 );
4108 assert_eq!(indexed_revision_count(&db), 1);
4109 assert_eq!(chunk_source_revision_count(&db), 1);
4110
4111 fs::remove_dir_all(root).unwrap();
4112 }
4113
4114 #[cfg(not(feature = "fastembed"))]
4115 #[test]
4116 fn fastembed_missing_feature_reports_rebuild_command() {
4117 let (root, config) = markdown_config("alpha token\n");
4118 let db = IndexDatabase::rebuild(&config).unwrap();
4119
4120 let err = db.install_model(ai::FASTEMBED_MODEL_ID).unwrap_err();
4121 assert!(err.to_string().contains(ai::FASTEMBED_MISSING_FEATURE_MESSAGE));
4122
4123 let status = db.local_ai_status().unwrap();
4124 assert!(!status.fastembed.build_feature_enabled);
4125 assert_eq!(status.fastembed.status, "MissingRuntime");
4126 assert_eq!(
4127 status.fastembed.message.as_deref(),
4128 Some(ai::FASTEMBED_MISSING_FEATURE_MESSAGE)
4129 );
4130 assert_eq!(status.fastembed.next.as_deref(), Some("cargo install rag-rat"));
4131
4132 fs::remove_dir_all(root).unwrap();
4133 }
4134
4135 #[test]
4136 fn reconcile_requires_explicit_model_install_and_ignores_stale_artifacts() {
4137 let (root, config) = markdown_config(
4138 "alpha token\nsecond line with enough detail for the semantic embedding policy to keep this chunk\nthird line with runtime context\n",
4139 );
4140 let db = IndexDatabase::rebuild(&config).unwrap();
4141 let chunk_id = first_chunk_id(&db);
4142
4143 let models = db.list_models().unwrap();
4144 let embedding = models.iter().find(|model| model.model_id == ai::HASH_MODEL_ID).unwrap();
4145 assert!(!embedding.installed);
4146 assert_eq!(embedding.status, "MissingModel");
4147
4148 let hits = db.search("alpha", 10, false).unwrap();
4149 assert_eq!(hits.len(), 1);
4150 assert!(hits[0].summary.contains("alpha token"));
4151
4152 let blocked = db.reconcile(Some(1), Some(8)).unwrap();
4153 assert_eq!(blocked.processed_chunks, 0);
4154 assert_eq!(blocked.embeddings_written, 0);
4155 assert_eq!(blocked.blocked_chunks, 0);
4156 assert_eq!(blocked.model_id, ai::HASH_MODEL_ID);
4157 assert_eq!(blocked.batch_size, 8);
4158 assert_eq!(blocked.status, "Blocked");
4159
4160 let status = db.local_ai_status().unwrap();
4161 assert_eq!(status.embedding.state, "MissingModel");
4162 assert_eq!(status.embedding.blocked_artifacts, 0);
4163
4164 db.install_model(ai::HASH_MODEL_ID).unwrap();
4165 let plan = db.reconcile_plan().unwrap();
4166 assert_eq!(plan.embeddings.missing, 1);
4167 assert_eq!(plan.embeddings.current, 0);
4168 let current = db.reconcile(Some(1), Some(8)).unwrap();
4169 assert_eq!(current.embeddings_written, 1);
4170 assert_eq!(current.model_id, ai::HASH_MODEL_ID);
4171 assert_eq!(current.model_version, "hash-v1");
4172 assert_eq!(current.embedding_dim, ai::HASH_EMBEDDING_DIM);
4173 assert_eq!(current.status, "Current");
4174 assert_eq!(current.work_reasons.get("Missing"), Some(&1));
4175 let noop = db.reconcile(None, Some(8)).unwrap();
4176 assert_eq!(noop.processed_chunks, 0);
4177 assert_eq!(noop.embeddings_written, 0);
4178 let status = db.local_ai_status().unwrap();
4179 assert_eq!(status.embedding.state, "Ready");
4180 assert_eq!(status.embedding.current_artifacts, 1);
4181 let embedding_bytes: i64 = db
4182 .storage
4183 .connection()
4184 .query_row(
4185 "SELECT length(vector_blob) FROM chunk_embeddings WHERE chunk_id = ?1 AND status = 'Current'",
4186 [chunk_id],
4187 |row| row.get(0),
4188 )
4189 .unwrap();
4190 assert_eq!(embedding_bytes, (ai::HASH_EMBEDDING_DIM * 4) as i64);
4191
4192 let hits = db.search("alpha", 10, false).unwrap();
4193 assert!(hits[0].summary.contains("alpha token"));
4194
4195 db.storage.connection().execute("DELETE FROM chunk_fts", []).unwrap();
4196 let vector_hits = db.search("alpha", 10, false).unwrap();
4197 assert_eq!(vector_hits.len(), 1);
4198 assert_eq!(vector_hits[0].chunk_id, chunk_id);
4199
4200 db.storage
4201 .connection()
4202 .execute(
4203 "UPDATE chunk_embeddings SET source_text_hash = 'old-hash' WHERE chunk_id = ?1",
4204 [chunk_id],
4205 )
4206 .unwrap();
4207 let plan = db.reconcile_plan().unwrap();
4208 assert_eq!(plan.embeddings.current, 0);
4209 assert_eq!(plan.embeddings.stale, 1);
4210 let refreshed = db.reconcile(None, Some(8)).unwrap();
4211 assert_eq!(refreshed.processed_chunks, 1);
4212 assert_eq!(refreshed.work_reasons.get("SourceChanged"), Some(&1));
4213 assert_eq!(db.current_embedding_count(ai::HASH_MODEL_ID).unwrap(), 1);
4214 let stale_embedding_hits = db.search("alpha", 10, false).unwrap();
4215 assert_eq!(stale_embedding_hits.len(), 1);
4216
4217 fs::remove_dir_all(root).unwrap();
4218 }
4219
4220 #[cfg(feature = "fastembed")]
4221 #[test]
4222 fn cached_fastembed_model_recovers_ready_state() {
4223 let (root, config) = markdown_config("alpha token\n");
4224 let db = IndexDatabase::rebuild(&config).unwrap();
4225 let cache_dir = root.join("models");
4226 let revision = "5f1b8cd78bc4fb444dd171e59b18f3a3af89a079";
4227 let repo = cache_dir.join("models--Qdrant--all-MiniLM-L6-v2-onnx");
4228 fs::create_dir_all(repo.join("refs")).unwrap();
4229 fs::create_dir_all(repo.join("snapshots").join(revision)).unwrap();
4230 fs::write(repo.join("refs").join("main"), revision).unwrap();
4231
4232 ai::recover_cached_fastembed_model_at(db.storage.connection(), &cache_dir).unwrap();
4233
4234 let models = db.list_models().unwrap();
4235 let fastembed =
4236 models.iter().find(|model| model.model_id == ai::FASTEMBED_MODEL_ID).unwrap();
4237 assert!(fastembed.installed);
4238 assert_eq!(fastembed.status, "Ready");
4239 let status = db.local_ai_status().unwrap();
4240 assert_eq!(status.fastembed.status, "Ready");
4241 assert!(status.fastembed.active);
4242
4243 fs::remove_dir_all(root).unwrap();
4244 }
4245
4246 #[cfg(feature = "fastembed")]
4247 #[test]
4248 fn compatible_migrate_recovers_cached_fastembed_model() {
4249 let (root, config) = markdown_config("alpha token\n");
4250 let db = IndexDatabase::rebuild(&config).unwrap();
4251 let cache_dir = root.join("models");
4252 let revision = "5f1b8cd78bc4fb444dd171e59b18f3a3af89a079";
4253 let repo = cache_dir.join("models--Qdrant--all-MiniLM-L6-v2-onnx");
4254 fs::create_dir_all(repo.join("refs")).unwrap();
4255 fs::create_dir_all(repo.join("snapshots").join(revision)).unwrap();
4256 fs::write(repo.join("refs").join("main"), revision).unwrap();
4257 db.storage
4258 .connection()
4259 .execute(
4260 "UPDATE ai_models
4261 SET installed = 0, status = 'MissingModel', installed_at_ms = NULL
4262 WHERE model_id = ?1",
4263 [ai::FASTEMBED_MODEL_ID],
4264 )
4265 .unwrap();
4266
4267 IndexDatabase::migrate_with_fastembed_cache(&config.database, Some(&cache_dir)).unwrap();
4268
4269 let db = IndexDatabase::open(&config.database).unwrap();
4270 let status = db.local_ai_status().unwrap();
4271 assert_eq!(status.fastembed.status, "Ready");
4272 assert!(status.fastembed.active);
4273
4274 fs::remove_dir_all(root).unwrap();
4275 }
4276
4277 #[test]
4278 fn reconcile_without_limit_processes_all_chunks() {
4279 let (root, config) = markdown_config(
4280 "# One\nalpha token with enough surrounding detail for embedding eligibility and useful semantic context\n\n# Two\nbeta token with enough surrounding detail for embedding eligibility and useful semantic context\n",
4281 );
4282 let db = IndexDatabase::rebuild(&config).unwrap();
4283 db.install_model(ai::HASH_MODEL_ID).unwrap();
4284
4285 let report = db.reconcile(None, Some(2)).unwrap();
4286
4287 assert_eq!(report.processed_chunks, 2);
4288 assert_eq!(report.embeddings_written, 2);
4289 assert_eq!(report.batch_size, 2);
4290 assert_eq!(db.current_embedding_count(ai::HASH_MODEL_ID).unwrap(), 2);
4291 let second = db.reconcile(None, Some(2)).unwrap();
4292 assert_eq!(second.processed_chunks, 0);
4293
4294 fs::remove_dir_all(root).unwrap();
4295 }
4296
4297 #[test]
4298 fn reconcile_treats_c_chunks_as_embedding_eligible() {
4299 let root = unique_temp_root();
4300 let _ = fs::remove_dir_all(&root);
4301 fs::create_dir_all(root.join("src")).unwrap();
4302 fs::write(
4303 root.join("src/main.c"),
4304 r#"
4305static int read_sensor_value(int baseline)
4306{
4307 int adjusted = baseline + 42;
4308 return adjusted;
4309}
4310
4311int main(void)
4312{
4313 int sample = read_sensor_value(7);
4314 return sample == 49 ? 0 : 1;
4315}
4316"#,
4317 )
4318 .unwrap();
4319 let config = source_config(root.clone(), Language::C);
4320 let db = IndexDatabase::rebuild(&config).unwrap();
4321 db.install_model(ai::HASH_MODEL_ID).unwrap();
4322
4323 let plan = db.reconcile_plan().unwrap();
4324
4325 assert_eq!(plan.embeddings.skipped_by_policy.get("SkipLanguageUnsupported"), None);
4326 assert!(plan.embeddings.missing > 0, "plan: {:?}", plan.embeddings);
4327
4328 let report = db.reconcile(None, Some(8)).unwrap();
4329 assert!(report.embeddings_written > 0, "report: {report:?}");
4330
4331 fs::remove_dir_all(root).unwrap();
4332 }
4333
4334 #[test]
4335 fn reconcile_policy_skips_tiny_chunks_before_embedding() {
4336 let (root, config) = markdown_config("tiny\n");
4337 let db = IndexDatabase::rebuild(&config).unwrap();
4338 db.install_model(ai::HASH_MODEL_ID).unwrap();
4339
4340 let plan = db.reconcile_plan().unwrap();
4341 assert_eq!(plan.embeddings.missing, 0);
4342 assert_eq!(plan.embeddings.skipped_by_policy.get("SkipTooSmall"), Some(&1));
4343
4344 let report = db.reconcile(None, Some(8)).unwrap();
4345 assert_eq!(report.embeddings_written, 0);
4346 assert_eq!(report.skipped_by_policy.get("SkipTooSmall"), Some(&1));
4347 assert_eq!(db.current_embedding_count(ai::HASH_MODEL_ID).unwrap(), 0);
4348
4349 fs::remove_dir_all(root).unwrap();
4350 }
4351
4352 #[test]
4353 fn reconcile_plan_reports_policy_skips_for_fastembed_model() {
4354 let (root, config) = markdown_config("tiny\n");
4355 let db = IndexDatabase::rebuild(&config).unwrap();
4356 db.storage
4357 .connection()
4358 .execute(
4359 "UPDATE ai_models
4360 SET installed = 1, disabled = 0, status = 'Ready', embedding_dim = ?2
4361 WHERE model_id = ?1",
4362 params![
4363 ai::FASTEMBED_MODEL_ID,
4364 i64::try_from(ai::FASTEMBED_EMBEDDING_DIM).unwrap()
4365 ],
4366 )
4367 .unwrap();
4368 db.storage
4369 .connection()
4370 .execute(
4371 "INSERT INTO index_meta(key, value) VALUES ('active_embedding_model', ?1)
4372 ON CONFLICT(key) DO UPDATE SET value = excluded.value",
4373 [ai::FASTEMBED_MODEL_ID],
4374 )
4375 .unwrap();
4376
4377 let plan = db.reconcile_plan().unwrap();
4378
4379 assert_eq!(plan.embeddings.model_id, ai::FASTEMBED_MODEL_ID);
4380 assert_eq!(plan.embeddings.missing, 0);
4381 assert_eq!(plan.embeddings.skipped_by_policy.get("SkipTooSmall"), Some(&1));
4382
4383 fs::remove_dir_all(root).unwrap();
4384 }
4385
4386 #[cfg(not(feature = "fastembed"))]
4387 #[test]
4388 fn blocked_fastembed_reconcile_still_reports_policy_skips() {
4389 let (root, config) = markdown_config("tiny\n");
4390 let db = IndexDatabase::rebuild(&config).unwrap();
4391 db.storage
4392 .connection()
4393 .execute(
4394 "INSERT INTO index_meta(key, value) VALUES ('active_embedding_model', ?1)
4395 ON CONFLICT(key) DO UPDATE SET value = excluded.value",
4396 [ai::FASTEMBED_MODEL_ID],
4397 )
4398 .unwrap();
4399
4400 let report = db.reconcile(None, Some(8)).unwrap();
4401
4402 assert_eq!(report.status, "Blocked");
4403 assert_eq!(report.skipped_by_policy.get("SkipTooSmall"), Some(&1));
4404
4405 fs::remove_dir_all(root).unwrap();
4406 }
4407
4408 #[test]
4409 fn search_explain_reports_weighted_score_components() {
4410 let (root, config) = markdown_config(
4411 "alpha runtime shutdown\nsecond line with enough detail for embedding eligibility and semantic vector scoring\nthird line\n",
4412 );
4413 let db = IndexDatabase::rebuild(&config).unwrap();
4414 db.install_model(ai::HASH_MODEL_ID).unwrap();
4415 db.reconcile(None, Some(8)).unwrap();
4416
4417 let hits = db.search_explain("runtime shutdown", 10, false).unwrap();
4418
4419 assert_eq!(hits.len(), 1);
4420 let components = hits[0].score_components.as_ref().unwrap();
4421 let component_sum = components.bm25
4422 + components.vector
4423 + components.symbol
4424 + components.graph
4425 + components.git
4426 + components.github;
4427 assert!((hits[0].score - component_sum).abs() < 0.000_001);
4428 assert!(components.bm25 > 0.0);
4429 assert!(components.vector > 0.0);
4430 assert!(components.vector_note.is_none());
4431 assert!(components.bm25 <= 0.45);
4432 assert!(components.vector <= 0.35);
4433 assert!(components.symbol <= 0.10);
4434 assert!(components.graph <= 0.05);
4435 assert!(components.git <= 0.03);
4436 assert!(components.github <= 0.02);
4437 assert!(db.search("runtime shutdown", 10, false).unwrap()[0].score_components.is_none());
4438
4439 fs::remove_dir_all(root).unwrap();
4440 }
4441
4442 #[test]
4443 fn search_explain_labels_missing_vector_runtime() {
4444 let (root, config) = markdown_config(
4445 "alpha runtime shutdown\nsecond line with enough detail for lexical search without embeddings\nthird line\n",
4446 );
4447 let db = IndexDatabase::rebuild(&config).unwrap();
4448
4449 let hits = db.search_explain("runtime shutdown", 10, false).unwrap();
4450
4451 assert_eq!(hits.len(), 1);
4452 let components = hits[0].score_components.as_ref().unwrap();
4453 assert!(components.bm25 > 0.0);
4454 assert_eq!(components.vector, 0.0);
4455 assert_eq!(
4456 components.vector_note.as_deref(),
4457 Some("vector search unavailable: no current embedding model")
4458 );
4459
4460 fs::remove_dir_all(root).unwrap();
4461 }
4462
4463 #[test]
4464 fn git_history_indexes_commits_paths_queries_and_blame() {
4465 let root = unique_temp_root();
4466 let _ = fs::remove_dir_all(&root);
4467 fs::create_dir_all(root.join("docs")).unwrap();
4468 fs::create_dir_all(root.join("src")).unwrap();
4469 run_git(&root, &["init"]);
4470 run_git(&root, &["config", "user.name", "Rag Rat"]);
4471 run_git(&root, &["config", "user.email", "rag@example.com"]);
4472
4473 fs::write(root.join("docs/search.md"), "# Title\nalpha token\n").unwrap();
4474 fs::write(root.join("src/lib.rs"), "pub fn tracked_symbol() {}\n").unwrap();
4475 run_git(&root, &["add", "."]);
4476 run_git(&root, &["commit", "-m", "Add alpha docs"]);
4477
4478 fs::write(root.join("docs/search.md"), "# Title\nbeta token\n").unwrap();
4479 run_git(&root, &["add", "."]);
4480 run_git(&root, &["commit", "-m", "Refresh beta docs"]);
4481
4482 let config = Config {
4483 root: root.clone(),
4484 database: root.join(".rag-rat/index.sqlite"),
4485 targets: vec![
4486 ResolvedTarget {
4487 name: "markdown".to_string(),
4488 language: Language::Markdown,
4489 directories: vec![PathBuf::from("docs")],
4490 include: vec!["**/*.md".to_string()],
4491 exclude: Vec::new(),
4492 kind: TargetKind::Docs,
4493 },
4494 ResolvedTarget {
4495 name: "rust".to_string(),
4496 language: Language::Rust,
4497 directories: vec![PathBuf::from("src")],
4498 include: vec!["**/*.rs".to_string()],
4499 exclude: Vec::new(),
4500 kind: TargetKind::Source,
4501 },
4502 ],
4503 local_ai: Default::default(),
4504 };
4505 let db = IndexDatabase::rebuild(&config).unwrap();
4506 let status = db.status(&config.database).unwrap();
4507 assert!(status.git_history.available);
4508 assert!(status.git_history.head.is_some());
4509 assert_eq!(status.git_history.indexed_head, status.git_history.head);
4510 assert_eq!(status.git_history.commit_count, 2);
4511 assert_eq!(status.git_history.file_change_count, 3);
4512
4513 let commit_hits = db.commit_search("beta", 10).unwrap();
4514 assert_eq!(commit_hits.len(), 1);
4515 assert_eq!(commit_hits[0].subject, "Refresh beta docs");
4516 assert_eq!(commit_hits[0].evidence_kind, "historical");
4517 assert!(commit_hits[0].score > 0.0);
4518
4519 let path_history = db.git_history_for_path("docs/search.md", 10).unwrap();
4520 assert_eq!(path_history.len(), 2);
4521 assert!(path_history.iter().all(|item| item.evidence_kind == "historical"));
4522
4523 let symbol_history =
4524 db.git_history_for_symbol("tracked_symbol", Some(Language::Rust), 10).unwrap();
4525 assert_eq!(symbol_history.len(), 1);
4526 assert_eq!(symbol_history[0].path, "src/lib.rs");
4527 assert_eq!(symbol_history[0].evidence_kind, "historical");
4528 let impact = db.impact_surface("tracked_symbol", 10).unwrap();
4529 assert!(impact.iter().any(|item| {
4530 item.category == "Direct structural impact" && item.reason == "exact_symbol_definition"
4531 }));
4532 assert!(impact.iter().any(|item| {
4533 item.category == "Historical/papertrail evidence"
4534 && item.reason == "git_commit_touched_file"
4535 }));
4536
4537 let query_commits = db.commits_touching_query("beta", 10).unwrap();
4538 let beta_commit =
4539 query_commits.iter().find(|hit| hit.subject == "Refresh beta docs").unwrap();
4540 assert!(beta_commit.evidence.iter().any(|value| value == "commit_message"));
4541 assert!(beta_commit.evidence.iter().any(|value| value == "file_change"));
4542 assert_eq!(beta_commit.evidence_kind, "historical");
4543
4544 let chunk_id = first_chunk_id(&db);
4545 let blame = db.git_blame_chunk(chunk_id).unwrap().unwrap();
4546 assert_eq!(blame.source_text_hash, hex_sha256("# Title\nbeta token\n".as_bytes()));
4547 assert_eq!(blame.line_count, 2);
4548 assert_eq!(blame.commit_counts.values().sum::<i64>(), 2);
4549 assert!(blame.dominant_commit_lines >= 1);
4550 assert!(blame.dominant_commit.is_some());
4551 assert_eq!(blame.evidence_kind, "historical");
4552 let cached = db.git_blame_chunk(chunk_id).unwrap().unwrap();
4553 assert_eq!(cached.source_text_hash, blame.source_text_hash);
4554
4555 fs::remove_dir_all(root).unwrap();
4556 }
4557
4558 #[test]
4559 fn indexes_rust_graph_edges_from_tree_sitter() {
4560 let root = unique_temp_root();
4561 let _ = fs::remove_dir_all(&root);
4562 fs::create_dir_all(root.join("src")).unwrap();
4563 fs::write(
4564 root.join("src/lib.rs"),
4565 r#"
4566use crate::worker::Worker;
4567mod worker;
4568
4569trait Service {
4570 fn serve(&self);
4571}
4572
4573struct Worker;
4574
4575impl Service for Worker {
4576 fn serve(&self) {
4577 helper();
4578 }
4579}
4580
4581fn helper() {}
4582
4583fn caller() {
4584 helper();
4585 Worker.serve();
4586}
4587"#,
4588 )
4589 .unwrap();
4590 let config = source_config(root.clone(), Language::Rust);
4591 let db = IndexDatabase::rebuild(&config).unwrap();
4592
4593 assert_edge(&db, "caller", "helper", "calls_name", "Syntactic");
4594 assert_edge(&db, "Worker", "Service", "implements", "Syntactic");
4595 assert_edge(&db, "src/lib.rs", "worker", "imports", "Syntactic");
4596 let callers = db.find_callers("helper", 10).unwrap();
4597 assert!(
4598 callers.iter().any(|edge| {
4599 edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("caller"))
4600 && edge.edge_kind == "calls_name"
4601 }),
4602 "helper callers: {callers:?}"
4603 );
4604
4605 fs::remove_dir_all(root).unwrap();
4606 }
4607
4608 #[test]
4609 fn ffi_surface_labels_exported_impl_members_separately() {
4610 let root = unique_temp_root();
4611 let _ = fs::remove_dir_all(&root);
4612 fs::create_dir_all(root.join("src")).unwrap();
4613 fs::write(
4614 root.join("src/lib.rs"),
4615 r#"
4616pub struct PhraseRepo;
4617
4618#[uniffi::export]
4619impl PhraseRepo {
4620 pub fn children(&self) {}
4621 pub fn journal(&self) {}
4622}
4623
4624#[cfg_attr(not(target_arch = "wasm32"), uniffi::export(async_runtime = "tokio"))]
4625impl Runtime {
4626 pub fn route_search_query(&self) {}
4627}
4628
4629pub struct Runtime;
4630
4631/// Not #[uniffi::export]: this is an internal helper.
4632pub fn internal_helper() {}
4633
4634#[cfg_attr(target_arch = "wasm32", ::uniffi::export)]
4635pub fn exported_fn() {}
4636"#,
4637 )
4638 .unwrap();
4639 let config = source_config(root.clone(), Language::Rust);
4640 let db = IndexDatabase::rebuild(&config).unwrap();
4641
4642 let surface = db.ffi_surface(20).unwrap();
4643 assert!(
4644 surface.iter().any(|item| {
4645 item.reason == "rust_uniffi_export"
4646 && item.symbol.as_deref().is_some_and(|symbol| symbol.ends_with("exported_fn"))
4647 }),
4648 "direct export should remain direct: {surface:?}"
4649 );
4650 assert!(
4651 surface.iter().any(|item| item.reason == "rust_uniffi_exported_impl"),
4652 "exported impl/type surface should be explicit: {surface:?}"
4653 );
4654 assert!(
4655 surface.iter().any(|item| {
4656 item.reason == "rust_uniffi_impl_member"
4657 && item
4658 .symbol
4659 .as_deref()
4660 .is_some_and(|symbol| symbol.ends_with("route_search_query"))
4661 }),
4662 "cfg_attr exported impl member should be labeled separately: {surface:?}"
4663 );
4664 assert!(
4665 surface.iter().any(|item| {
4666 item.reason == "rust_uniffi_impl_member"
4667 && item.symbol.as_deref().is_some_and(|symbol| symbol.ends_with("children"))
4668 }),
4669 "impl member should be labeled separately: {surface:?}"
4670 );
4671 assert!(
4672 !surface.iter().any(|item| {
4673 item.reason == "rust_uniffi_export"
4674 && item.symbol.as_deref().is_some_and(|symbol| {
4675 symbol.ends_with("children") || symbol.ends_with("journal")
4676 })
4677 }),
4678 "impl members must not be reported as direct exports: {surface:?}"
4679 );
4680 assert!(
4681 !surface.iter().any(|item| {
4682 item.symbol.as_deref().is_some_and(|symbol| symbol.ends_with("internal_helper"))
4683 }),
4684 "comment-only UniFFI mentions must not create FFI surface rows: {surface:?}"
4685 );
4686
4687 fs::remove_dir_all(root).unwrap();
4688 }
4689
4690 #[test]
4691 fn search_and_read_chunk_attach_bounded_graph_evidence() {
4692 let root = unique_temp_root();
4693 let _ = fs::remove_dir_all(&root);
4694 fs::create_dir_all(root.join("src")).unwrap();
4695 fs::write(
4696 root.join("src/lib.rs"),
4697 "pub fn helper() {}\n\npub fn caller() {\n helper();\n}\n",
4698 )
4699 .unwrap();
4700 let config = source_config(root.clone(), Language::Rust);
4701 let db = IndexDatabase::rebuild(&config).unwrap();
4702
4703 let hits = db.search("helper caller", 10, false).unwrap();
4704 let helper_hit = hits
4705 .iter()
4706 .find(|hit| hit.symbol_path.as_deref().is_some_and(|path| path.ends_with("helper")))
4707 .expect("helper search hit");
4708 let helper_graph = helper_hit.graph.as_ref().expect("helper graph evidence");
4709 assert_eq!(helper_graph.caller_count, 1);
4710 assert!(helper_graph.top_callers.iter().any(|caller| {
4711 caller.symbol_path.ends_with("caller")
4712 && caller.callsite.line == 4
4713 && caller.callsite.span == [4, 4]
4714 && caller.confidence == "syntactic"
4715 }));
4716 assert!(helper_graph.callers.is_empty(), "search keeps graph compact");
4717
4718 let caller_hit = hits
4719 .iter()
4720 .find(|hit| hit.symbol_path.as_deref().is_some_and(|path| path.ends_with("caller")))
4721 .expect("caller search hit");
4722 let caller_graph = caller_hit.graph.as_ref().expect("caller graph evidence");
4723 assert!(caller_graph.top_callees.iter().any(|callee| {
4724 callee.target == "helper"
4725 && callee.callsite.line == 4
4726 && callee.callsite.span == [4, 4]
4727 && callee.confidence == "syntactic"
4728 }));
4729
4730 let chunk = db.read_chunk(caller_hit.chunk_id).unwrap().expect("caller chunk");
4731 let full_graph = chunk.graph.as_ref().expect("full read_chunk graph");
4732 assert!(full_graph.symbol.as_ref().is_some_and(|symbol| symbol.name == "caller"));
4733 assert!(
4734 full_graph
4735 .callees
4736 .iter()
4737 .any(|callee| callee.target == "helper" && callee.callsite.line == 4)
4738 );
4739 assert!(full_graph.notes.iter().any(|note| note.contains("tree-sitter/syntactic")));
4740
4741 fs::remove_dir_all(root).unwrap();
4742 }
4743
4744 #[test]
4745 fn graph_exact_mode_requires_verified_symbol_identity() {
4746 let root = unique_temp_root();
4747 let _ = fs::remove_dir_all(&root);
4748 fs::create_dir_all(root.join("src")).unwrap();
4749 fs::write(
4750 root.join("src/lib.rs"),
4751 "pub fn helper() {}\n\npub fn caller() {\n helper();\n}\n",
4752 )
4753 .unwrap();
4754 let config = source_config(root.clone(), Language::Rust);
4755 let db = IndexDatabase::rebuild(&config).unwrap();
4756 let helper = db.symbols("helper", Some(Language::Rust), 10).unwrap().remove(0);
4757 let caller = db.symbols("caller", Some(Language::Rust), 10).unwrap().remove(0);
4758
4759 let bare_exact = db
4760 .find_callers_with_options(
4761 "helper",
4762 10,
4763 &crate::query::graph::GraphTraversalOptions {
4764 resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
4765 ..Default::default()
4766 },
4767 )
4768 .unwrap();
4769 assert!(bare_exact.is_empty(), "bare exact lookup should not fall back: {bare_exact:?}");
4770
4771 let exact_callers = db
4772 .find_callers_with_options(
4773 "helper",
4774 10,
4775 &crate::query::graph::GraphTraversalOptions {
4776 resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
4777 symbol_id: Some(helper.symbol_id),
4778 ..Default::default()
4779 },
4780 )
4781 .unwrap();
4782 assert!(
4783 exact_callers.iter().any(|edge| {
4784 edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("caller"))
4785 && edge.verified_target_symbol
4786 }),
4787 "exact callers: {exact_callers:?}"
4788 );
4789 assert!(exact_callers.iter().all(|edge| edge.verified_target_symbol));
4790
4791 let exact_callees = db
4792 .trace_callees_with_options(
4793 "caller",
4794 10,
4795 &crate::query::graph::GraphTraversalOptions {
4796 resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
4797 symbol_id: Some(caller.symbol_id),
4798 ..Default::default()
4799 },
4800 )
4801 .unwrap();
4802 assert!(
4803 exact_callees.iter().any(|edge| {
4804 edge.target.as_deref() == Some("helper") && edge.verified_target_symbol
4805 }),
4806 "exact callees: {exact_callees:?}"
4807 );
4808 assert!(exact_callees.iter().all(|edge| edge.verified_target_symbol));
4809
4810 fs::remove_dir_all(root).unwrap();
4811 }
4812
4813 #[test]
4814 fn symbol_lookup_ranks_type_definitions_before_impl_blocks() {
4815 let root = unique_temp_root();
4816 let _ = fs::remove_dir_all(&root);
4817 fs::create_dir_all(root.join("src")).unwrap();
4818 fs::write(
4819 root.join("src/lib.rs"),
4820 r#"
4821impl Database {
4822 pub fn open() -> Self {
4823 Database
4824 }
4825}
4826
4827pub struct Database;
4828"#,
4829 )
4830 .unwrap();
4831 let config = source_config(root.clone(), Language::Rust);
4832 let db = IndexDatabase::rebuild(&config).unwrap();
4833 let hits = db.symbols("Database", Some(Language::Rust), 10).unwrap();
4834 assert!(hits.len() >= 2, "fixture should expose both impl and struct symbols: {hits:?}");
4835 assert_eq!(hits[0].kind, "struct", "Database lookup should prefer type definition");
4836 assert!(
4837 hits.iter().any(|hit| hit.kind == "impl"),
4838 "impl Database should still be available after the struct: {hits:?}"
4839 );
4840
4841 fs::remove_dir_all(root).unwrap();
4842 }
4843
4844 #[test]
4845 fn logical_symbol_exact_mode_covers_duplicate_rust_variants() {
4846 let root = unique_temp_root();
4847 let _ = fs::remove_dir_all(&root);
4848 fs::create_dir_all(root.join("src")).unwrap();
4849 fs::write(
4850 root.join("src/lib.rs"),
4851 r#"
4852#[cfg(not(target_arch = "wasm32"))]
4853pub fn spawn_blocking() {}
4854
4855#[cfg(target_arch = "wasm32")]
4856pub fn spawn_blocking() {}
4857
4858pub fn caller() {
4859 spawn_blocking();
4860}
4861"#,
4862 )
4863 .unwrap();
4864 let config = source_config(root.clone(), Language::Rust);
4865 let db = IndexDatabase::rebuild(&config).unwrap();
4866 let lookup = db
4867 .symbol_candidates(&crate::query::symbol::SymbolSelector {
4868 logical_symbol_id: None,
4869 symbol_id: None,
4870 symbol_path: None,
4871 symbol: Some("spawn_blocking".to_string()),
4872 language: Some(Language::Rust),
4873 allow_ambiguous: true,
4874 limit: 10,
4875 })
4876 .unwrap();
4877 let logical_symbol_id = lookup.candidates[0].logical_symbol_id.expect("logical id");
4878 assert_eq!(lookup.candidates[0].logical_variant_count, Some(2));
4879 assert_eq!(lookup.candidates[0].logical_group_reason.as_deref(), Some("cfg_variant"));
4880
4881 let exact_variant_callers = db
4882 .find_callers_with_options(
4883 "spawn_blocking",
4884 10,
4885 &crate::query::graph::GraphTraversalOptions {
4886 resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
4887 symbol_id: Some(lookup.candidates[1].symbol_id),
4888 ..Default::default()
4889 },
4890 )
4891 .unwrap();
4892 assert!(
4893 exact_variant_callers.iter().any(|edge| {
4894 edge.from_symbol.as_deref().is_some_and(|symbol| symbol.ends_with("caller"))
4895 && edge.target.as_deref() == Some("spawn_blocking")
4896 && edge.verified_target_symbol
4897 }),
4898 "symbol_id exact should include its logical cfg group: {exact_variant_callers:?}"
4899 );
4900 assert!(exact_variant_callers.iter().all(|edge| edge.verified_target_symbol));
4901
4902 let exact_logical = db
4903 .graph_traversal_report(
4904 "find_callers",
4905 &lookup.candidates[0],
4906 true,
4907 10,
4908 &crate::query::graph::GraphTraversalOptions {
4909 resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
4910 symbol_id: Some(lookup.candidates[0].symbol_id),
4911 ..Default::default()
4912 },
4913 )
4914 .unwrap();
4915 assert_eq!(exact_logical.query.logical_symbol_id, Some(logical_symbol_id));
4916 assert_eq!(
4917 exact_logical.logical_symbol.as_ref().map(|symbol| symbol.variant_count),
4918 Some(2)
4919 );
4920 assert_eq!(exact_logical.variants.len(), 2);
4921 assert!(exact_logical.results.iter().all(|edge| edge.verified_target_symbol));
4922 assert!(
4923 exact_logical.results.iter().any(|edge| {
4924 edge.from_symbol.as_deref().is_some_and(|symbol| symbol.ends_with("caller"))
4925 && edge.target.as_deref() == Some("spawn_blocking")
4926 }),
4927 "logical exact callers: {exact_logical:?}"
4928 );
4929
4930 fs::remove_dir_all(root).unwrap();
4931 }
4932
4933 #[test]
4934 fn indexes_real_world_rust_graph_patterns() {
4935 let root = fixture_temp_root("graph-realworld/rust");
4936 let config = source_config(root.clone(), Language::Rust);
4937 let db = IndexDatabase::rebuild(&config).unwrap();
4938
4939 assert_edge(&db, "src/lib.rs", "worker", "imports", "Syntactic");
4940 assert_edge(&db, "src/lib.rs", "Worker", "exports", "Syntactic");
4941 assert_edge(&db, "entry", "new", "calls_name", "NameOnly");
4942 assert_edge(&db, "entry", "Client", "references_type", "Syntactic");
4943 assert_edge(&db, "drive", "serve", "calls_name", "NameOnly");
4944 assert_edge(&db, "drive", "GenericRunner", "references_type", "Syntactic");
4945 assert_edge(&db, "Worker", "Service", "implements", "Syntactic");
4946 assert_edge(&db, "generic_call", "T", "references_type", "NameOnly");
4947 assert_edge(&db, "entry", "generated_call", "uses_macro", "NameOnly");
4948 let syntactic_callers = db.find_callers("serve", 10).unwrap();
4949 assert!(
4950 syntactic_callers.is_empty(),
4951 "syntactic serve callers should avoid receiver/name fallback: {syntactic_callers:?}"
4952 );
4953 let callers = db
4954 .find_callers_with_options(
4955 "serve",
4956 10,
4957 &crate::query::graph::GraphTraversalOptions {
4958 resolution_mode: crate::query::graph::GraphResolutionMode::Fuzzy,
4959 ..Default::default()
4960 },
4961 )
4962 .unwrap();
4963 assert!(
4964 callers.iter().any(|edge| {
4965 edge.edge_kind == "calls_name"
4966 && edge.edge_confidence == edge.confidence
4967 && edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("drive"))
4968 }),
4969 "serve callers: {callers:?}"
4970 );
4971
4972 fs::remove_dir_all(root).unwrap();
4973 }
4974
4975 #[test]
4976 fn indexes_typescript_graph_edges_from_tree_sitter() {
4977 let root = unique_temp_root();
4978 let _ = fs::remove_dir_all(&root);
4979 fs::create_dir_all(root.join("src")).unwrap();
4980 fs::write(
4981 root.join("src/helper.ts"),
4982 "export function helper() {}\nexport const Card = () => null;\n",
4983 )
4984 .unwrap();
4985 fs::write(
4986 root.join("src/App.tsx"),
4987 r#"
4988import { helper, Card } from "./helper";
4989
4990export function run() {
4991 helper();
4992 return <Card />;
4993}
4994
4995export const callRun = () => run();
4996"#,
4997 )
4998 .unwrap();
4999 let config = source_config(root.clone(), Language::TypeScript);
5000 let db = IndexDatabase::rebuild(&config).unwrap();
5001
5002 assert_edge(&db, "run", "helper", "calls_name", "Syntactic");
5003 assert_edge(&db, "run", "Card", "references_type", "Syntactic");
5004 assert_edge(&db, "src/App.tsx", "helper", "imports", "Syntactic");
5005 assert_edge(&db, "src/App.tsx", "run", "exports", "Syntactic");
5006 let callees = db.trace_callees("callRun", 10).unwrap();
5007 assert!(
5008 callees.iter().any(|edge| {
5009 edge.to_symbol.as_deref().is_some_and(|name| name.ends_with("run"))
5010 && edge.confidence == "Syntactic"
5011 }),
5012 "callRun callees: {callees:?}"
5013 );
5014
5015 fs::remove_dir_all(root).unwrap();
5016 }
5017
5018 #[test]
5019 fn indexes_c_graph_edges_from_tree_sitter() {
5020 let root = unique_temp_root();
5021 let _ = fs::remove_dir_all(&root);
5022 fs::create_dir_all(root.join("src")).unwrap();
5023 fs::write(
5024 root.join("src/runtime.c"),
5025 r#"
5026typedef struct Runtime Runtime;
5027
5028struct Runtime {
5029 int state;
5030};
5031
5032int helper(Runtime *runtime) {
5033 return runtime->state;
5034}
5035
5036int runtime_open(Runtime *runtime) {
5037 return helper(runtime);
5038}
5039"#,
5040 )
5041 .unwrap();
5042 let config = source_config(root.clone(), Language::C);
5043 let db = IndexDatabase::rebuild(&config).unwrap();
5044
5045 assert_edge(&db, "runtime_open", "helper", "calls_name", "Syntactic");
5046
5047 fs::remove_dir_all(root).unwrap();
5048 }
5049
5050 #[test]
5051 fn indexes_c_file_scope_macro_regions_for_search() {
5052 let root = unique_temp_root();
5053 let _ = fs::remove_dir_all(&root);
5054 fs::create_dir_all(root.join("drivers/entropy")).unwrap();
5055 fs::write(
5056 root.join("drivers/entropy/entropy.c"),
5057 r#"
5058static int entropy_init(const struct device *dev)
5059{
5060 ARG_UNUSED(dev);
5061 return 0;
5062}
5063
5064/* Entropy driver APIs structure */
5065static DEVICE_API(entropy, entropy_cryptoacc_trng_api) = {
5066 .get_entropy = entropy_cryptoacc_trng_get_entropy,
5067};
5068
5069DEVICE_DT_INST_DEFINE(0, entropy_init, NULL, NULL, NULL,
5070 PRE_KERNEL_1, CONFIG_ENTROPY_INIT_PRIORITY,
5071 &entropy_cryptoacc_trng_api);
5072"#,
5073 )
5074 .unwrap();
5075 let config = Config {
5076 root: root.clone(),
5077 database: root.join(".rag-rat/index.sqlite"),
5078 targets: vec![ResolvedTarget {
5079 name: "c".to_string(),
5080 language: Language::C,
5081 directories: vec![PathBuf::from("drivers/entropy")],
5082 include: vec!["**/*.c".to_string()],
5083 exclude: Vec::new(),
5084 kind: TargetKind::Source,
5085 }],
5086 local_ai: Default::default(),
5087 };
5088 let db = IndexDatabase::rebuild(&config).unwrap();
5089
5090 let hits = db.search("DEVICE_API", 5, false).unwrap();
5091 assert!(
5092 hits.iter().any(|hit| {
5093 hit.path == "drivers/entropy/entropy.c" && hit.summary.contains("DEVICE_API")
5094 }),
5095 "DEVICE_API hits: {hits:?}"
5096 );
5097
5098 fs::remove_dir_all(root).unwrap();
5099 }
5100
5101 #[test]
5102 fn indexes_cpp_graph_edges_from_tree_sitter() {
5103 let root = unique_temp_root();
5104 let _ = fs::remove_dir_all(&root);
5105 fs::create_dir_all(root.join("src")).unwrap();
5106 fs::write(
5107 root.join("src/runtime.cpp"),
5108 r#"
5109namespace held {
5110class Runtime {
5111public:
5112 void open();
5113};
5114
5115void helper() {}
5116
5117void Runtime::open() {
5118 helper();
5119}
5120}
5121"#,
5122 )
5123 .unwrap();
5124 let config = source_config(root.clone(), Language::Cpp);
5125 let db = IndexDatabase::rebuild(&config).unwrap();
5126
5127 assert_edge(&db, "open", "helper", "calls_name", "Syntactic");
5128
5129 fs::remove_dir_all(root).unwrap();
5130 }
5131
5132 #[test]
5133 fn indexes_real_world_typescript_graph_patterns() {
5134 let root = fixture_temp_root("graph-realworld/typescript");
5135 let config = source_config(root.clone(), Language::TypeScript);
5136 let db = IndexDatabase::rebuild(&config).unwrap();
5137
5138 assert_edge(&db, "src/lib.tsx", "DefaultWidget", "imports", "Syntactic");
5139 assert_edge(&db, "src/lib.tsx", "WidgetNS", "imports", "NameOnly");
5140 assert_edge(&db, "src/lib.tsx", "WidgetProps", "imports", "Syntactic");
5141 assert_edge(&db, "src/lib.tsx", "ReExportedWidget", "exports", "NameOnly");
5142 assert_edge(&db, "useWidget", "useMemo", "calls_name", "NameOnly");
5143 assert_edge(&db, "useWidget", "DefaultWidget", "calls_name", "Syntactic");
5144 assert_edge(&db, "Shell", "renderWidget", "calls_name", "NameOnly");
5145 assert_edge(&db, "Shell", "WidgetNS", "references_type", "NameOnly");
5146 assert_edge(&db, "Shell", "DefaultWidget", "references_type", "Syntactic");
5147 assert_edge(&db, "DefaultWidget", "WidgetProps", "references_type", "Syntactic");
5148 let callees = db
5149 .trace_callees_with_options(
5150 "Shell",
5151 10,
5152 &crate::query::graph::GraphTraversalOptions {
5153 include_references: true,
5154 edge_kinds: None,
5155 ..Default::default()
5156 },
5157 )
5158 .unwrap();
5159 assert!(
5160 callees.iter().any(|edge| {
5161 edge.edge_kind == "references_type"
5162 && edge.edge_confidence == edge.confidence
5163 && edge.to_symbol.as_deref().is_some_and(|name| name.ends_with("DefaultWidget"))
5164 }),
5165 "Shell callees: {callees:?}"
5166 );
5167
5168 fs::remove_dir_all(root).unwrap();
5169 }
5170
5171 #[test]
5172 fn rust_macro_edges_do_not_resolve_to_same_named_modules() {
5173 let root = unique_temp_root();
5174 let _ = fs::remove_dir_all(&root);
5175 fs::create_dir_all(root.join("src")).unwrap();
5176 fs::write(
5177 root.join("src/lib.rs"),
5178 r#"
5179mod format;
5180
5181fn execute_one() {
5182 let _value = format!("hello");
5183}
5184"#,
5185 )
5186 .unwrap();
5187 fs::write(root.join("src/format.rs"), "pub fn helper() {}\n").unwrap();
5188 let config = source_config(root.clone(), Language::Rust);
5189 let db = IndexDatabase::rebuild(&config).unwrap();
5190
5191 let edge = db
5192 .storage
5193 .connection()
5194 .query_row(
5195 "
5196 SELECT edge_kind, to_name, to_symbol_id, confidence, resolution, evidence
5197 FROM edges
5198 WHERE edge_kind = 'uses_macro'
5199 AND to_name = 'format'
5200 ",
5201 [],
5202 |row| {
5203 Ok((
5204 row.get::<_, String>(0)?,
5205 row.get::<_, String>(1)?,
5206 row.get::<_, Option<i64>>(2)?,
5207 row.get::<_, String>(3)?,
5208 row.get::<_, String>(4)?,
5209 row.get::<_, Option<String>>(5)?,
5210 ))
5211 },
5212 )
5213 .unwrap();
5214 assert_eq!(edge.0, "uses_macro");
5215 assert_eq!(edge.1, "format");
5216 assert_eq!(edge.2, None);
5217 assert_eq!(edge.3, "NameOnly");
5218 assert_eq!(edge.4, "unresolved");
5219 assert!(edge.5.as_deref().is_some_and(|value| value.contains("format!")));
5220
5221 fs::remove_dir_all(root).unwrap();
5222 }
5223
5224 #[test]
5225 fn opening_old_graph_policy_rebuilds_stale_macro_edges() {
5226 let root = unique_temp_root();
5227 let _ = fs::remove_dir_all(&root);
5228 fs::create_dir_all(root.join("src")).unwrap();
5229 fs::write(
5230 root.join("src/lib.rs"),
5231 r#"
5232mod format;
5233
5234fn execute_one() {
5235 let _value = format!("hello");
5236}
5237"#,
5238 )
5239 .unwrap();
5240 fs::write(root.join("src/format.rs"), "pub fn helper() {}\n").unwrap();
5241 let config = source_config(root.clone(), Language::Rust);
5242 let db = IndexDatabase::rebuild(&config).unwrap();
5243 db.storage
5244 .connection()
5245 .execute("UPDATE index_meta SET value = 'old' WHERE key = 'graph_index_version'", [])
5246 .unwrap();
5247 db.storage
5248 .connection()
5249 .execute(
5250 "
5251 UPDATE edges
5252 SET edge_kind = 'calls_name',
5253 to_symbol_id = (SELECT id FROM symbols WHERE name = 'format' LIMIT 1),
5254 confidence = 'Syntactic',
5255 evidence = NULL,
5256 resolution = 'syntactic'
5257 WHERE to_name = 'format'
5258 ",
5259 [],
5260 )
5261 .unwrap();
5262 drop(db);
5263
5264 let reopened = IndexDatabase::open(&config.database).unwrap();
5265 let edge = reopened
5266 .storage
5267 .connection()
5268 .query_row(
5269 "
5270 SELECT edge_kind, to_symbol_id, confidence, resolution, evidence
5271 FROM edges
5272 WHERE to_name = 'format'
5273 AND edge_kind = 'uses_macro'
5274 ",
5275 [],
5276 |row| {
5277 Ok((
5278 row.get::<_, String>(0)?,
5279 row.get::<_, Option<i64>>(1)?,
5280 row.get::<_, String>(2)?,
5281 row.get::<_, String>(3)?,
5282 row.get::<_, Option<String>>(4)?,
5283 ))
5284 },
5285 )
5286 .unwrap();
5287 assert_eq!(edge.0, "uses_macro");
5288 assert_eq!(edge.1, None);
5289 assert_eq!(edge.2, "NameOnly");
5290 assert_eq!(edge.3, "unresolved");
5291 assert!(edge.4.as_deref().is_some_and(|value| value.contains("format!")));
5292
5293 fs::remove_dir_all(root).unwrap();
5294 }
5295
5296 #[test]
5297 fn qualified_common_member_calls_do_not_resolve_by_short_name() {
5298 let root = unique_temp_root();
5299 let _ = fs::remove_dir_all(&root);
5300 fs::create_dir_all(root.join("src")).unwrap();
5301 fs::write(
5302 root.join("src/lib.rs"),
5303 r#"
5304pub struct AlertsStore;
5305
5306impl AlertsStore {
5307 pub fn new() -> Self {
5308 Self
5309 }
5310}
5311
5312pub fn caller() {
5313 let _items: Vec<String> = Vec::new();
5314}
5315"#,
5316 )
5317 .unwrap();
5318 let config = source_config(root.clone(), Language::Rust);
5319 let db = IndexDatabase::rebuild(&config).unwrap();
5320
5321 let edge = db
5322 .storage
5323 .connection()
5324 .query_row(
5325 "
5326 SELECT to_name, target_qualified_name, to_symbol_id, confidence, resolution
5327 FROM edges
5328 WHERE from_name LIKE '%caller'
5329 AND edge_kind = 'calls_name'
5330 AND to_name = 'new'
5331 ",
5332 [],
5333 |row| {
5334 Ok((
5335 row.get::<_, String>(0)?,
5336 row.get::<_, Option<String>>(1)?,
5337 row.get::<_, Option<i64>>(2)?,
5338 row.get::<_, String>(3)?,
5339 row.get::<_, String>(4)?,
5340 ))
5341 },
5342 )
5343 .unwrap();
5344 assert_eq!(edge.0, "new");
5345 assert_eq!(edge.1.as_deref(), Some("Vec::new"));
5346 assert_eq!(edge.2, None);
5347 assert_eq!(edge.3, "NameOnly");
5348 assert_eq!(edge.4, "unresolved");
5349
5350 fs::remove_dir_all(root).unwrap();
5351 }
5352
5353 #[test]
5354 fn macro_edges_do_not_resolve_to_same_named_typescript_symbols() {
5355 let root = unique_temp_root();
5356 let _ = fs::remove_dir_all(&root);
5357 fs::create_dir_all(root.join("src")).unwrap();
5358 fs::write(
5359 root.join("src/lib.rs"),
5360 r#"
5361fn rust_entry() {
5362 let _payload = json!({"ok": true});
5363}
5364"#,
5365 )
5366 .unwrap();
5367 fs::write(root.join("src/preferences.ts"), "export function json() { return {}; }\n")
5368 .unwrap();
5369 let mut config = source_config(root.clone(), Language::Rust);
5370 config.targets.push(ResolvedTarget {
5371 name: "typescript".to_string(),
5372 language: Language::TypeScript,
5373 directories: vec![PathBuf::from("src")],
5374 include: vec!["**/*.ts".to_string()],
5375 exclude: Vec::new(),
5376 kind: TargetKind::Source,
5377 });
5378 let db = IndexDatabase::rebuild(&config).unwrap();
5379
5380 let edge = db
5381 .storage
5382 .connection()
5383 .query_row(
5384 "
5385 SELECT edge_kind, to_name, to_symbol_id, confidence, resolution, evidence
5386 FROM edges
5387 WHERE edge_kind = 'uses_macro'
5388 AND to_name = 'json'
5389 ",
5390 [],
5391 |row| {
5392 Ok((
5393 row.get::<_, String>(0)?,
5394 row.get::<_, String>(1)?,
5395 row.get::<_, Option<i64>>(2)?,
5396 row.get::<_, String>(3)?,
5397 row.get::<_, String>(4)?,
5398 row.get::<_, Option<String>>(5)?,
5399 ))
5400 },
5401 )
5402 .unwrap();
5403 assert_eq!(edge.0, "uses_macro");
5404 assert_eq!(edge.1, "json");
5405 assert_eq!(edge.2, None);
5406 assert_eq!(edge.3, "NameOnly");
5407 assert_eq!(edge.4, "unresolved");
5408 assert!(edge.5.as_deref().is_some_and(|value| value.contains("json!")));
5409
5410 fs::remove_dir_all(root).unwrap();
5411 }
5412
5413 #[test]
5414 fn qualified_crate_helper_callers_use_name_fallback() {
5415 let root = unique_temp_root();
5416 let _ = fs::remove_dir_all(&root);
5417 fs::create_dir_all(root.join("src")).unwrap();
5418 fs::write(
5419 root.join("src/lib.rs"),
5420 r#"
5421pub mod task_spawn {
5422 pub fn spawn_blocking() {}
5423}
5424
5425pub fn first() {
5426 crate::task_spawn::spawn_blocking();
5427}
5428
5429pub fn second() {
5430 task_spawn::spawn_blocking();
5431}
5432"#,
5433 )
5434 .unwrap();
5435 let config = source_config(root.clone(), Language::Rust);
5436 let db = IndexDatabase::rebuild(&config).unwrap();
5437
5438 let callers = db.find_callers("spawn_blocking", 10).unwrap();
5439 assert!(
5440 callers.iter().any(|edge| {
5441 edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("first"))
5442 && edge.edge_kind == "calls_name"
5443 && edge.resolution == "target_name_fallback"
5444 }),
5445 "spawn_blocking callers: {callers:?}"
5446 );
5447 assert!(
5448 callers.iter().any(|edge| {
5449 edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("second"))
5450 && edge.edge_kind == "calls_name"
5451 }),
5452 "spawn_blocking callers: {callers:?}"
5453 );
5454
5455 fs::remove_dir_all(root).unwrap();
5456 }
5457
5458 #[test]
5459 fn caller_lookup_does_not_match_related_names_or_chain_evidence() {
5460 let root = unique_temp_root();
5461 let _ = fs::remove_dir_all(&root);
5462 fs::create_dir_all(root.join("src")).unwrap();
5463 fs::write(
5464 root.join("src/lib.rs"),
5465 r#"
5466pub mod runtime {
5467 pub mod task_spawn {
5468 pub fn spawn() {}
5469 pub fn spawn_blocking() -> JoinHandle {
5470 JoinHandle
5471 }
5472 pub fn spawn_blocking_handle() {}
5473 pub fn spawn_blocking_offload() -> JoinHandle {
5474 JoinHandle
5475 }
5476 }
5477}
5478
5479pub struct JoinHandle;
5480
5481impl JoinHandle {
5482 pub fn map_err(self) {}
5483}
5484
5485pub fn direct() {
5486 crate::runtime::task_spawn::spawn_blocking();
5487}
5488
5489pub fn related_handle() {
5490 crate::runtime::task_spawn::spawn_blocking_handle();
5491}
5492
5493pub fn related_offload_chain() {
5494 crate::runtime::task_spawn::spawn_blocking_offload().map_err();
5495}
5496
5497pub fn related_spawn_with_text() {
5498 crate::runtime::task_spawn::spawn();
5499}
5500"#,
5501 )
5502 .unwrap();
5503 let config = source_config(root.clone(), Language::Rust);
5504 let db = IndexDatabase::rebuild(&config).unwrap();
5505
5506 let callers = db.find_callers("spawn_blocking", 20).unwrap();
5507 assert!(
5508 callers.iter().any(|edge| {
5509 edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("direct"))
5510 && edge.target.as_deref() == Some("spawn_blocking")
5511 && edge.edge_kind == "calls_name"
5512 }),
5513 "spawn_blocking callers: {callers:?}"
5514 );
5515 assert!(
5516 callers.iter().all(|edge| {
5517 !edge.from_symbol.as_deref().is_some_and(|name| {
5518 name.ends_with("related_handle")
5519 || name.ends_with("related_offload_chain")
5520 || name.ends_with("related_spawn_with_text")
5521 }) && !matches!(
5522 edge.target.as_deref(),
5523 Some("spawn_blocking_handle" | "spawn_blocking_offload" | "spawn" | "map_err")
5524 )
5525 }),
5526 "caller lookup leaked related names or chain evidence: {callers:?}"
5527 );
5528
5529 let qualified_callers = db.find_callers("src/lib.rs::spawn_blocking", 20).unwrap();
5530 assert!(
5531 qualified_callers.iter().any(|edge| {
5532 edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("direct"))
5533 && edge.target.as_deref() == Some("spawn_blocking")
5534 && edge.edge_kind == "calls_name"
5535 }),
5536 "qualified spawn_blocking callers: {qualified_callers:?}"
5537 );
5538 assert!(
5539 qualified_callers.iter().all(|edge| {
5540 !edge.from_symbol.as_deref().is_some_and(|name| {
5541 name.ends_with("related_handle")
5542 || name.ends_with("related_offload_chain")
5543 || name.ends_with("related_spawn_with_text")
5544 }) && !matches!(
5545 edge.target.as_deref(),
5546 Some("spawn_blocking_handle" | "spawn_blocking_offload" | "spawn" | "map_err")
5547 )
5548 }),
5549 "qualified caller lookup leaked related names or chain evidence: {qualified_callers:?}"
5550 );
5551
5552 fs::remove_dir_all(root).unwrap();
5553 }
5554
5555 #[test]
5556 fn files_past_the_old_structural_cap_still_contribute_symbols_and_edges() {
5557 let root = unique_temp_root();
5558 let _ = fs::remove_dir_all(&root);
5559 fs::create_dir_all(root.join("src")).unwrap();
5560 let filler =
5561 (0..700).map(|idx| format!("pub fn filler_{idx}() {{}}\n")).collect::<String>();
5562 fs::write(
5563 root.join("src/lib.rs"),
5564 format!(
5565 r#"
5566pub mod task_spawn {{
5567 pub fn spawn_blocking() {{}}
5568}}
5569
5570{filler}
5571
5572pub fn caller() {{
5573 crate::task_spawn::spawn_blocking();
5574}}
5575"#
5576 ),
5577 )
5578 .unwrap();
5579 let config = source_config(root.clone(), Language::Rust);
5580 assert!(fs::metadata(root.join("src/lib.rs")).unwrap().len() > 10_000);
5581 let db = IndexDatabase::rebuild(&config).unwrap();
5582
5583 let symbols = db.symbols("caller", Some(Language::Rust), 10).unwrap();
5584 assert!(
5585 symbols.iter().any(|symbol| symbol.name == "caller"),
5586 "caller symbols: {symbols:?}"
5587 );
5588 let callers = db.find_callers("spawn_blocking", 10).unwrap();
5589 assert!(
5590 callers.iter().any(|edge| {
5591 edge.edge_kind == "calls_name"
5592 && edge.target.as_deref() == Some("spawn_blocking")
5593 && edge.callsite.as_ref().is_some_and(|callsite| callsite.line > 700)
5594 }),
5595 "spawn_blocking callers: {callers:?}"
5596 );
5597 let impact =
5598 db.impact_surface("callers of crate::task_spawn::spawn_blocking in src", 10).unwrap();
5599 assert!(
5600 impact.iter().any(|item| {
5601 item.category == "Direct structural impact" && item.reason == "direct_caller"
5602 }),
5603 "impact: {impact:?}"
5604 );
5605
5606 fs::remove_dir_all(root).unwrap();
5607 }
5608
5609 #[test]
5610 fn impact_surface_uses_high_signal_query_symbols_and_call_edges() {
5611 let root = unique_temp_root();
5612 let _ = fs::remove_dir_all(&root);
5613 fs::create_dir_all(root.join("src")).unwrap();
5614 fs::write(
5615 root.join("src/lib.rs"),
5616 r#"
5617pub mod runtime {
5618 pub fn unrelated_runtime_symbol() {}
5619}
5620
5621pub mod task_spawn {
5622 pub fn spawn_blocking<F, T>(f: F) -> T
5623 where
5624 F: FnOnce() -> T + Send + 'static,
5625 T: Send + 'static,
5626 {
5627 f()
5628 }
5629}
5630
5631pub fn caller() {
5632 crate::task_spawn::spawn_blocking(|| 1);
5633}
5634"#,
5635 )
5636 .unwrap();
5637 let config = source_config(root.clone(), Language::Rust);
5638 let db = IndexDatabase::rebuild(&config).unwrap();
5639 let impact = db
5640 .impact_surface(
5641 "change runtime task_spawn spawn_blocking wasm inline native blocking pool",
5642 20,
5643 )
5644 .unwrap();
5645 assert!(
5646 impact.iter().any(|item| {
5647 item.category == "Direct structural impact"
5648 && item.reason == "direct_caller"
5649 && item.symbol.as_deref().is_some_and(|symbol| symbol.ends_with("caller"))
5650 }),
5651 "spawn_blocking caller should be present: {impact:?}"
5652 );
5653 assert!(
5654 impact.iter().all(|item| {
5655 !(item.reason == "exact_symbol_definition"
5656 && item.symbol.as_deref().is_some_and(|symbol| symbol.ends_with("runtime")))
5657 }),
5658 "broad `runtime` token should not become an exact impact seed: {impact:?}"
5659 );
5660 assert!(
5661 impact.iter().all(|item| {
5662 !item.evidence.iter().any(|evidence| evidence.contains("references_type"))
5663 && item.symbol.as_deref() != Some("Send")
5664 }),
5665 "type references should not appear as direct impact: {impact:?}"
5666 );
5667
5668 fs::remove_dir_all(root).unwrap();
5669 }
5670
5671 #[test]
5672 fn docs_for_symbol_prefers_local_source_context_before_broad_markdown() {
5673 let root = unique_temp_root();
5674 let _ = fs::remove_dir_all(&root);
5675 fs::create_dir_all(root.join("src/runtime")).unwrap();
5676 fs::create_dir_all(root.join("docs")).unwrap();
5677 fs::write(
5678 root.join("src/runtime/task_spawn.rs"),
5679 r#"
5680pub fn spawn_blocking<F, T>(f: F) -> T
5681where
5682 F: FnOnce() -> T + Send + 'static,
5683 T: Send + 'static,
5684{
5685 f()
5686}
5687"#,
5688 )
5689 .unwrap();
5690 fs::write(
5691 root.join("docs/phrase-persistence.md"),
5692 "# Phrase persistence\nUnrelated notes mention spawn_blocking in passing.\n",
5693 )
5694 .unwrap();
5695 fs::write(
5696 root.join("docs/task_spawn.md"),
5697 "# task_spawn\nLocal task_spawn notes explain spawn_blocking.\n",
5698 )
5699 .unwrap();
5700 let config = Config {
5701 root: root.clone(),
5702 database: root.join(".rag-rat/index.sqlite"),
5703 targets: vec![
5704 ResolvedTarget {
5705 name: "rust".to_string(),
5706 language: Language::Rust,
5707 directories: vec![PathBuf::from("src")],
5708 include: vec!["src/".to_string()],
5709 exclude: Vec::new(),
5710 kind: TargetKind::Source,
5711 },
5712 ResolvedTarget {
5713 name: "markdown".to_string(),
5714 language: Language::Markdown,
5715 directories: vec![PathBuf::from("docs")],
5716 include: vec!["**/*.md".to_string()],
5717 exclude: Vec::new(),
5718 kind: TargetKind::Docs,
5719 },
5720 ],
5721 local_ai: Default::default(),
5722 };
5723 let db = IndexDatabase::rebuild(&config).unwrap();
5724 let symbol = db.symbols("spawn_blocking", Some(Language::Rust), 10).unwrap().remove(0);
5725 let hits = db.docs_for_selected_symbol(&symbol, 10).unwrap();
5726 assert_eq!(hits[0].path, "src/runtime/task_spawn.rs", "docs hits: {hits:?}");
5727 let phrase_index = hits.iter().position(|hit| hit.path == "docs/phrase-persistence.md");
5728 let task_spawn_index = hits.iter().position(|hit| hit.path == "docs/task_spawn.md");
5729 assert!(
5730 phrase_index.is_none_or(|phrase| task_spawn_index.is_some_and(|local| local < phrase)),
5731 "path-local task_spawn docs should outrank unrelated phrase docs: {hits:?}"
5732 );
5733
5734 fs::remove_dir_all(root).unwrap();
5735 }
5736
5737 #[test]
5738 fn partial_tree_sitter_trees_still_contribute_valid_symbols_and_edges() {
5739 let root = unique_temp_root();
5740 let _ = fs::remove_dir_all(&root);
5741 fs::create_dir_all(root.join("src")).unwrap();
5742 fs::write(
5743 root.join("src/lib.rs"),
5744 r#"
5745pub fn helper() {}
5746
5747pub fn caller() {
5748 helper();
5749}
5750
5751fn broken( {
5752"#,
5753 )
5754 .unwrap();
5755 let config = source_config(root.clone(), Language::Rust);
5756 let db = IndexDatabase::rebuild(&config).unwrap();
5757
5758 let symbols = db.symbols("caller", Some(Language::Rust), 10).unwrap();
5759 assert!(
5760 symbols.iter().any(|symbol| symbol.name == "caller"),
5761 "caller symbols: {symbols:?}"
5762 );
5763 assert_edge(&db, "caller", "helper", "calls_name", "Syntactic");
5764
5765 fs::remove_dir_all(root).unwrap();
5766 }
5767
5768 #[test]
5769 fn receiver_method_calls_do_not_bind_to_same_named_free_functions() {
5770 let root = unique_temp_root();
5771 let _ = fs::remove_dir_all(&root);
5772 fs::create_dir_all(root.join("src")).unwrap();
5773 fs::write(
5774 root.join("src/lib.rs"),
5775 r#"
5776pub fn spawn_blocking() {}
5777
5778pub fn caller(joinset: JoinSet) {
5779 joinset.spawn_blocking();
5780}
5781
5782pub struct JoinSet;
5783"#,
5784 )
5785 .unwrap();
5786 let config = source_config(root.clone(), Language::Rust);
5787 let db = IndexDatabase::rebuild(&config).unwrap();
5788
5789 let edge = db
5790 .storage
5791 .connection()
5792 .query_row(
5793 "
5794 SELECT to_name, target_qualified_name, to_symbol_id, confidence, resolution, receiver_hint
5795 FROM edges
5796 WHERE from_name LIKE '%caller'
5797 AND edge_kind = 'calls_name'
5798 AND to_name = 'spawn_blocking'
5799 ",
5800 [],
5801 |row| {
5802 Ok((
5803 row.get::<_, String>(0)?,
5804 row.get::<_, Option<String>>(1)?,
5805 row.get::<_, Option<i64>>(2)?,
5806 row.get::<_, String>(3)?,
5807 row.get::<_, String>(4)?,
5808 row.get::<_, Option<String>>(5)?,
5809 ))
5810 },
5811 )
5812 .unwrap();
5813 assert_eq!(edge.0, "spawn_blocking");
5814 assert_eq!(edge.1.as_deref(), Some("joinset::spawn_blocking"));
5815 assert_eq!(edge.2, None);
5816 assert_eq!(edge.3, "NameOnly");
5817 assert_eq!(edge.4, "unresolved");
5818 assert_eq!(edge.5.as_deref(), Some("joinset"));
5819
5820 fs::remove_dir_all(root).unwrap();
5821 }
5822
5823 #[test]
5824 fn trace_callees_excludes_type_references_by_default() {
5825 let root = unique_temp_root();
5826 let _ = fs::remove_dir_all(&root);
5827 fs::create_dir_all(root.join("src")).unwrap();
5828 fs::write(
5829 root.join("src/lib.rs"),
5830 r#"
5831pub struct JoinError;
5832pub enum Result<T, E> { Ok(T), Err(E) }
5833pub fn helper() {}
5834
5835pub fn spawn_blocking<F, T>(f: F) -> Result<T, JoinError>
5836where
5837 F: FnOnce() -> T + Send + 'static,
5838 T: Send + 'static,
5839{
5840 helper();
5841 tokio::task::spawn_blocking(f)
5842}
5843"#,
5844 )
5845 .unwrap();
5846 let config = source_config(root.clone(), Language::Rust);
5847 let db = IndexDatabase::rebuild(&config).unwrap();
5848
5849 let default_callees = db.trace_callees("spawn_blocking", 20).unwrap();
5850 assert!(
5851 default_callees.iter().any(|edge| {
5852 edge.edge_kind == "calls_name"
5853 && edge.target.as_deref() == Some("helper")
5854 && edge.verified_target_symbol
5855 }),
5856 "default callees: {default_callees:?}"
5857 );
5858 assert!(
5859 default_callees
5860 .iter()
5861 .all(|edge| edge.target_qualified_name.as_deref()
5862 != Some("tokio::task::spawn_blocking")),
5863 "default callees leaked unresolved external call: {default_callees:?}"
5864 );
5865 assert!(
5866 default_callees.iter().all(|edge| edge.edge_kind != "references_type"),
5867 "default callees leaked type refs: {default_callees:?}"
5868 );
5869 assert!(
5870 default_callees.iter().all(|edge| !matches!(
5871 edge.target.as_deref(),
5872 Some("F" | "T" | "Send" | "Result" | "JoinError")
5873 )),
5874 "default callees leaked generic/type targets: {default_callees:?}"
5875 );
5876
5877 let with_refs = db
5878 .trace_callees_with_options(
5879 "spawn_blocking",
5880 20,
5881 &crate::query::graph::GraphTraversalOptions {
5882 include_references: true,
5883 edge_kinds: None,
5884 ..Default::default()
5885 },
5886 )
5887 .unwrap();
5888 assert!(
5889 with_refs.iter().any(|edge| edge.edge_kind == "references_type"),
5890 "reference-enabled callees: {with_refs:?}"
5891 );
5892
5893 let with_unresolved = db
5894 .trace_callees_with_options(
5895 "spawn_blocking",
5896 20,
5897 &crate::query::graph::GraphTraversalOptions {
5898 include_unresolved: true,
5899 ..Default::default()
5900 },
5901 )
5902 .unwrap();
5903 assert!(
5904 with_unresolved
5905 .iter()
5906 .any(|edge| edge.target_qualified_name.as_deref()
5907 == Some("tokio::task::spawn_blocking")),
5908 "unresolved-enabled callees: {with_unresolved:?}"
5909 );
5910
5911 fs::remove_dir_all(root).unwrap();
5912 }
5913
5914 #[test]
5915 fn trace_callees_defaults_to_repo_relevant_calls() {
5916 let root = unique_temp_root();
5917 let _ = fs::remove_dir_all(&root);
5918 fs::create_dir_all(root.join("src")).unwrap();
5919 fs::write(
5920 root.join("src/lib.rs"),
5921 r#"
5922pub fn repo_helper() {}
5923
5924pub fn caller(input: Result<String, String>) -> String {
5925 repo_helper();
5926 let values: Vec<String> = Vec::new();
5927 let _ = input.map_err(|error| error.to_string());
5928 let _ = Some("value").unwrap_or_else(|| "fallback");
5929 let _ = format!("hello");
5930 values.get(0).unwrap_or_else(|| "fallback").to_string()
5931}
5932"#,
5933 )
5934 .unwrap();
5935 let config = source_config(root.clone(), Language::Rust);
5936 let db = IndexDatabase::rebuild(&config).unwrap();
5937
5938 let default_callees = db.trace_callees("caller", 20).unwrap();
5939 assert!(
5940 default_callees.iter().any(|edge| edge.target.as_deref() == Some("repo_helper")),
5941 "default callees should keep repo-local calls: {default_callees:?}"
5942 );
5943 assert!(
5944 default_callees.iter().all(|edge| {
5945 edge.edge_kind != "uses_macro"
5946 && !matches!(
5947 edge.target.as_deref(),
5948 Some("new" | "map_err" | "unwrap_or_else" | "to_string" | "format")
5949 )
5950 }),
5951 "default callees leaked low-signal calls: {default_callees:?}"
5952 );
5953
5954 let expanded = db
5955 .trace_callees_with_options(
5956 "caller",
5957 20,
5958 &crate::query::graph::GraphTraversalOptions {
5959 include_unresolved: true,
5960 include_macros: true,
5961 include_common_methods: true,
5962 ..Default::default()
5963 },
5964 )
5965 .unwrap();
5966 assert!(
5967 expanded.iter().any(|edge| edge.edge_kind == "uses_macro"),
5968 "macro-enabled callees: {expanded:?}"
5969 );
5970 assert!(
5971 expanded.iter().any(|edge| edge.target.as_deref() == Some("unwrap_or_else")),
5972 "common-method-enabled callees: {expanded:?}"
5973 );
5974
5975 fs::remove_dir_all(root).unwrap();
5976 }
5977
5978 #[test]
5979 fn indexes_kotlin_graph_edges_from_tree_sitter() {
5980 let root = unique_temp_root();
5981 let _ = fs::remove_dir_all(&root);
5982 fs::create_dir_all(root.join("src")).unwrap();
5983 fs::write(
5984 root.join("src/Main.kt"),
5985 r#"
5986package dev.cq27.test
5987
5988import dev.cq27.lib.ExternalThing
5989
5990interface Syncable
5991
5992class MainBridge : Syncable {
5993 suspend fun syncOnce() {
5994 helper()
5995 ExternalThing()
5996 }
5997}
5998
5999fun helper() {}
6000"#,
6001 )
6002 .unwrap();
6003 let config = source_config(root.clone(), Language::Kotlin);
6004 let db = IndexDatabase::rebuild(&config).unwrap();
6005
6006 assert_edge(&db, "syncOnce", "helper", "calls_name", "Syntactic");
6007 assert_edge(&db, "MainBridge", "Syncable", "implements", "Syntactic");
6008 assert_edge(&db, "src/Main.kt", "ExternalThing", "imports", "NameOnly");
6009 let impact = db.impact_surface("helper", 10).unwrap();
6010 assert!(
6011 impact.iter().any(|item| {
6012 item.category == "Direct structural impact" && item.reason == "direct_caller"
6013 }),
6014 "impact: {impact:?}"
6015 );
6016
6017 fs::remove_dir_all(root).unwrap();
6018 }
6019
6020 #[test]
6021 fn indexes_real_world_kotlin_graph_patterns() {
6022 let root = fixture_temp_root("graph-realworld/kotlin");
6023 let config = source_config(root.clone(), Language::Kotlin);
6024 let db = IndexDatabase::rebuild(&config).unwrap();
6025
6026 assert_edge(&db, "src/Main.kt", "ExternalFactory", "imports", "NameOnly");
6027 assert_edge(&db, "Worker", "companion", "contains", "Exact");
6028 assert_edge(&db, "companion", "create", "contains", "Exact");
6029 assert_edge(&db, "syncOnce", "create", "calls_name", "Syntactic");
6030 assert_edge(&db, "syncOnce", "Worker", "references_type", "Syntactic");
6031 assert_edge(&db, "syncOnce", "run", "calls_name", "Syntactic");
6032 assert_edge(&db, "syncOnce", "SingletonRunner", "references_type", "Syntactic");
6033 assert_edge(&db, "syncOnce", "ExternalFactory", "calls_name", "NameOnly");
6034 assert_edge(&db, "syncOnce", "ExternalFactory", "references_type", "NameOnly");
6035 assert_edge(&db, "syncOnce", "cleaned", "calls_name", "Syntactic");
6036 let callers = db.find_callers("cleaned", 10).unwrap();
6037 assert!(
6038 callers.iter().any(|edge| {
6039 edge.edge_kind == "calls_name"
6040 && edge.edge_confidence == edge.confidence
6041 && edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("syncOnce"))
6042 }),
6043 "cleaned callers: {callers:?}"
6044 );
6045
6046 fs::remove_dir_all(root).unwrap();
6047 }
6048
6049 #[test]
6050 fn kotlin_caller_lookup_respects_qualified_receivers_for_common_method_names() {
6051 let root = unique_temp_root();
6052 let _ = fs::remove_dir_all(&root);
6053 fs::create_dir_all(root.join("src")).unwrap();
6054 fs::write(
6055 root.join("src/Main.kt"),
6056 r#"
6057package dev.cq27.test
6058
6059object WatchProposalBuilder {
6060 fun build(): String = "proposal"
6061}
6062
6063class AndroidDialogBuilder {
6064 fun build(): String = "dialog"
6065}
6066
6067fun actualCaller() {
6068 WatchProposalBuilder.build()
6069}
6070
6071fun unrelatedBuilderCalls(dialog: AndroidDialogBuilder) {
6072 dialog.build()
6073 AndroidDialogBuilder().build()
6074}
6075"#,
6076 )
6077 .unwrap();
6078 let config = source_config(root.clone(), Language::Kotlin);
6079 let db = IndexDatabase::rebuild(&config).unwrap();
6080 let target = db
6081 .symbols("build", Some(Language::Kotlin), 10)
6082 .unwrap()
6083 .into_iter()
6084 .find(|symbol| symbol.qualified_name.contains("WatchProposalBuilder"))
6085 .expect("WatchProposalBuilder.build symbol");
6086 let callers = db
6087 .find_callers_with_options(
6088 "build",
6089 20,
6090 &crate::query::graph::GraphTraversalOptions {
6091 resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
6092 symbol_id: Some(target.symbol_id),
6093 ..Default::default()
6094 },
6095 )
6096 .unwrap();
6097 assert_eq!(
6098 callers
6099 .iter()
6100 .filter(|edge| edge
6101 .from_symbol
6102 .as_deref()
6103 .is_some_and(|name| name.ends_with("actualCaller")))
6104 .count(),
6105 1,
6106 "actual caller should be present once: {callers:?}"
6107 );
6108 assert!(
6109 callers.iter().all(|edge| edge
6110 .from_symbol
6111 .as_deref()
6112 .is_none_or(|name| !name.ends_with("unrelatedBuilderCalls"))),
6113 "unrelated builder calls should not resolve to WatchProposalBuilder.build: {callers:?}"
6114 );
6115
6116 fs::remove_dir_all(root).unwrap();
6117 }
6118
6119 #[test]
6120 fn github_sync_caches_papertrail_and_rationale_without_query_time_crawling() {
6121 let (root, config) =
6122 markdown_config("# Decision\nRefs cq27-dev/rag-rat#42\nwe will keep sqlite\n");
6123 let db = IndexDatabase::rebuild(&config).unwrap();
6124 let mock = MockGitHubClient;
6125
6126 let offline =
6127 github::sync_from_refs::<MockGitHubClient>(db.storage.connection(), &root, None, true)
6128 .unwrap();
6129 assert!(offline.offline);
6130 assert_eq!(offline.discovered_refs, 1);
6131 assert_eq!(offline.synced_items, 0);
6132
6133 let report =
6134 github::sync_from_refs(db.storage.connection(), &root, Some(&mock), false).unwrap();
6135 assert!(!report.offline);
6136 assert_eq!(report.discovered_refs, 1);
6137 assert_eq!(report.synced_items, 5);
6138 assert_eq!(report.status.issues, 1);
6139 assert_eq!(report.status.comments, 1);
6140 assert_eq!(report.status.pulls, 1);
6141 assert_eq!(report.status.reviews, 1);
6142 assert_eq!(report.status.review_comments, 1);
6143
6144 let issue_hits = db.github_issue_search("sqlite", 10).unwrap();
6145 assert_eq!(issue_hits.len(), 1);
6146 assert_eq!(issue_hits[0].classification, "decision");
6147 assert_eq!(issue_hits[0].evidence_kind, "historical_github");
6148
6149 let refs = db.github_refs_for_path("docs/search.md", 10).unwrap();
6150 assert_eq!(refs.len(), 1);
6151 assert_eq!(refs[0].source_kind, "file");
6152
6153 let rationale = db.rationale_search("risk", 10).unwrap();
6154 assert!(rationale.iter().any(|item| item.classification == "risk"));
6155 let issue_ref_rationale = db.rationale_search("Fixes #42", 10).unwrap();
6156 assert_eq!(issue_ref_rationale.first().map(|item| item.number), Some(42));
6157 assert_eq!(
6158 issue_ref_rationale.first().map(|item| item.evidence_kind),
6159 Some("literal_github_ref")
6160 );
6161 assert_eq!(issue_ref_rationale.first().map(|item| item.score), Some(1.0));
6162 assert!(
6163 issue_ref_rationale.iter().any(|item| item.number == 42),
6164 "issue ref rationale should use structured GitHub refs: {issue_ref_rationale:?}"
6165 );
6166
6167 let chunk_id = first_chunk_id(&db);
6168 let papertrail = db.papertrail_for_chunk(chunk_id, 10).unwrap().unwrap();
6169 assert!(papertrail.current_source.is_some());
6170 assert!(!papertrail.github_evidence.is_empty());
6171 assert!(papertrail.github_evidence.iter().all(|item| {
6172 matches!(item.evidence_kind, "historical_github" | "literal_github_ref")
6173 }));
6174
6175 fs::remove_dir_all(root).unwrap();
6176 }
6177
6178 #[test]
6179 fn papertrail_for_commit_prefers_commit_sourced_github_refs() {
6180 let root = unique_temp_root();
6181 let _ = fs::remove_dir_all(&root);
6182 fs::create_dir_all(root.join("docs")).unwrap();
6183 run_git(&root, &["init"]);
6184 run_git(&root, &["config", "user.name", "Rag Rat"]);
6185 run_git(&root, &["config", "user.email", "rag@example.com"]);
6186 fs::write(root.join("docs/search.md"), "# Decision\nalpha\n").unwrap();
6187 run_git(&root, &["add", "."]);
6188 run_git(&root, &["commit", "-m", "Fix search rationale", "-m", "Fixes #42"]);
6189
6190 let config = markdown_config_for_root(root.clone());
6191 let db = IndexDatabase::rebuild(&config).unwrap();
6192 let commit = db
6193 .storage
6194 .connection()
6195 .query_row("SELECT hash FROM git_commits LIMIT 1", [], |row| row.get::<_, String>(0))
6196 .unwrap();
6197 let mock = MockGitHubClient;
6198 github::sync_from_refs(db.storage.connection(), &root, Some(&mock), false).unwrap();
6199
6200 let papertrail = db.papertrail_for_commit(&commit[..7], 10).unwrap();
6201 assert_eq!(papertrail.github_evidence.first().map(|item| item.number), Some(42));
6202 assert_eq!(
6203 papertrail.github_evidence.first().map(|item| item.evidence_kind),
6204 Some("literal_github_ref")
6205 );
6206 assert!(
6207 papertrail.fallback_github_evidence.is_empty(),
6208 "structured commit refs should suppress noisy fallback evidence: {papertrail:?}"
6209 );
6210
6211 fs::remove_dir_all(root).unwrap();
6212 }
6213
6214 #[test]
6215 fn papertrail_for_symbol_dedupes_duplicate_file_refs() {
6216 let root = unique_temp_root();
6217 let _ = fs::remove_dir_all(&root);
6218 fs::create_dir_all(root.join("src")).unwrap();
6219 fs::write(
6220 root.join("src/lib.rs"),
6221 "// First rationale (#42)\n// Second rationale (#42)\npub fn tracked_symbol() {}\n",
6222 )
6223 .unwrap();
6224 let config = source_config(root.clone(), Language::Rust);
6225 let db = IndexDatabase::rebuild(&config).unwrap();
6226 let mock = MockGitHubClient;
6227 github::sync_from_refs(db.storage.connection(), &root, Some(&mock), false).unwrap();
6228 let papertrail = db
6229 .papertrail_for_symbol("tracked_symbol", Some(Language::Rust), 10)
6230 .unwrap()
6231 .expect("tracked symbol papertrail");
6232
6233 assert_eq!(
6234 papertrail
6235 .github_evidence
6236 .iter()
6237 .filter(|item| item.number == 42 && item.item_kind == "issue")
6238 .count(),
6239 1,
6240 "duplicate #42 refs in one file should collapse to one issue evidence row: {papertrail:?}"
6241 );
6242
6243 fs::remove_dir_all(root).unwrap();
6244 }
6245
6246 #[test]
6247 fn github_sync_keeps_partial_cache_and_skips_synced_refs_after_404() {
6248 let (root, config) = markdown_config(
6249 "# Decision\nRefs cq27-dev/rag-rat#42 and cq27-dev/rag-rat#404\nwe will keep sqlite\n",
6250 );
6251 let db = IndexDatabase::rebuild(&config).unwrap();
6252 let mock = PartiallyFailingGitHubClient;
6253
6254 let report =
6255 github::sync_from_refs(db.storage.connection(), &root, Some(&mock), false).unwrap();
6256 assert_eq!(report.discovered_refs, 2);
6257 assert_eq!(report.synced_items, 5);
6258 assert_eq!(report.failed_refs, 1);
6259 assert_eq!(report.errors.len(), 1);
6260 assert_eq!(report.errors[0].number, 404);
6261 assert_eq!(report.errors[0].status, "not_found");
6262
6263 let issue_hits = db.github_issue_search("sqlite", 10).unwrap();
6264 assert_eq!(issue_hits.len(), 1);
6265 assert_eq!(issue_hits[0].number, 42);
6266
6267 let second =
6268 github::sync_from_refs(db.storage.connection(), &root, Some(&mock), false).unwrap();
6269 assert_eq!(second.synced_items, 0);
6270 assert_eq!(second.skipped_refs, 2);
6271 assert_eq!(second.failed_refs, 0);
6272
6273 fs::remove_dir_all(root).unwrap();
6274 }
6275
6276 #[test]
6277 fn search_recovers_when_fts_is_marked_dirty() {
6278 let (root, config) = markdown_config("alpha token");
6279 let db = IndexDatabase::rebuild(&config).unwrap();
6280 db.mark_fts_dirty().unwrap();
6281
6282 let dirty = db.status(&config.database).unwrap();
6283 assert!(dirty.fts_dirty);
6284 assert!(!dirty.fts_fresh);
6285
6286 let hits = db.search("alpha", 10, false).unwrap();
6287 assert_eq!(hits.len(), 1);
6288 assert_eq!(hits[0].summary, "alpha token");
6289 let fresh = db.status(&config.database).unwrap();
6290 assert!(!fresh.fts_dirty);
6291 assert!(fresh.fts_fresh);
6292
6293 fs::remove_dir_all(root).unwrap();
6294 }
6295
6296 #[test]
6297 fn read_chunk_relocates_small_line_drift_to_current_text() {
6298 let (root, config) = markdown_config("# Title\nalpha token\n");
6299 let db = IndexDatabase::rebuild(&config).unwrap();
6300 let chunk_id = first_chunk_id(&db);
6301 fs::write(root.join("docs/search.md"), "inserted\n# Title\nalpha token\n").unwrap();
6302
6303 let chunk = db.read_chunk(chunk_id).unwrap().unwrap();
6304 assert_eq!(chunk.start_line, 2);
6305 assert_eq!(chunk.end_line, 3);
6306 assert_eq!(chunk.text, "# Title\nalpha token\n");
6307
6308 fs::remove_dir_all(root).unwrap();
6309 }
6310
6311 #[test]
6312 fn read_chunk_large_drift_reindexes_and_reports_stale_chunk() {
6313 let (root, config) = markdown_config("# Title\nalpha token\n");
6314 let db = IndexDatabase::rebuild(&config).unwrap();
6315 let chunk_id = first_chunk_id(&db);
6316 fs::write(root.join("docs/search.md"), "# Replacement\nbeta token\n").unwrap();
6317
6318 let err = db.read_chunk(chunk_id).unwrap_err().to_string();
6319 assert!(err.contains("StaleChunk"), "{err}");
6320 let hits = db.search("beta", 10, false).unwrap();
6321 assert_eq!(hits.len(), 1);
6322 assert!(db.search("alpha", 10, false).unwrap().is_empty());
6323
6324 fs::remove_dir_all(root).unwrap();
6325 }
6326
6327 #[test]
6328 fn search_retries_after_healing_stale_hit() {
6329 let (root, config) = markdown_config("# Title\nalpha token\n");
6330 let db = IndexDatabase::rebuild(&config).unwrap();
6331 fs::write(root.join("docs/search.md"), "# Title\nbeta token\n").unwrap();
6332
6333 let hits = db.search("alpha", 10, false).unwrap();
6334 assert!(hits.is_empty());
6335 let beta_hits = db.search("beta", 10, false).unwrap();
6336 assert_eq!(beta_hits.len(), 1);
6337 assert!(beta_hits[0].summary.contains("beta"));
6338
6339 fs::remove_dir_all(root).unwrap();
6340 }
6341
6342 #[test]
6343 fn search_heals_relocated_hits_before_returning_line_spans() {
6344 let (root, config) = markdown_config("# Title\nalpha token\n");
6345 let db = IndexDatabase::rebuild(&config).unwrap();
6346 fs::write(root.join("docs/search.md"), "inserted\n# Title\nalpha token\n").unwrap();
6347
6348 let hits = db.search("alpha", 10, false).unwrap();
6349 assert_eq!(hits.len(), 1);
6350 assert_eq!(hits[0].start_line, 2);
6351 assert_eq!(hits[0].end_line, 3);
6352 assert!(hits[0].summary.contains("alpha"));
6353
6354 fs::remove_dir_all(root).unwrap();
6355 }
6356
6357 #[test]
6358 fn read_chunk_deleted_source_reports_gone() {
6359 let (root, config) = markdown_config("# Title\nalpha token\n");
6360 let db = IndexDatabase::rebuild(&config).unwrap();
6361 let chunk_id = first_chunk_id(&db);
6362 fs::remove_file(root.join("docs/search.md")).unwrap();
6363
6364 let err = db.read_chunk(chunk_id).unwrap_err().to_string();
6365 assert!(err.contains("Gone"), "{err}");
6366 assert!(db.search("alpha", 10, false).unwrap().is_empty());
6367
6368 fs::remove_dir_all(root).unwrap();
6369 }
6370
6371 #[test]
6372 fn search_returns_needs_reindex_when_heal_cap_is_exceeded() {
6373 let root = unique_temp_root();
6374 let _ = fs::remove_dir_all(&root);
6375 let docs = root.join("docs");
6376 fs::create_dir_all(&docs).unwrap();
6377 for index in 0..=MAX_AUTO_HEAL_FILES_PER_CALL {
6378 fs::write(docs.join(format!("doc-{index}.md")), "common stale token\n").unwrap();
6379 }
6380 let config = markdown_config_for_root(root.clone());
6381 let db = IndexDatabase::rebuild(&config).unwrap();
6382 for index in 0..=MAX_AUTO_HEAL_FILES_PER_CALL {
6383 fs::write(docs.join(format!("doc-{index}.md")), "fresh replacement token\n").unwrap();
6384 }
6385
6386 let err = db.search("common", 20, false).unwrap_err().to_string();
6387 assert!(err.contains("needs_reindex"), "{err}");
6388
6389 fs::remove_dir_all(root).unwrap();
6390 }
6391
6392 #[test]
6393 fn heal_index_limit_does_not_warn_when_only_fresh_files_are_skipped() {
6394 let root = unique_temp_root();
6395 let _ = fs::remove_dir_all(&root);
6396 let docs = root.join("docs");
6397 fs::create_dir_all(&docs).unwrap();
6398 fs::write(docs.join("one.md"), "one fresh token\n").unwrap();
6399 fs::write(docs.join("two.md"), "two fresh token\n").unwrap();
6400 let config = markdown_config_for_root(root.clone());
6401 let db = IndexDatabase::rebuild(&config).unwrap();
6402
6403 let report = db.heal_index(Some(1)).unwrap();
6404
6405 assert_eq!(report.healed_files, 0);
6406 assert_eq!(report.removed_files, 0);
6407 assert_eq!(report.skipped_files, 2);
6408 assert_eq!(report.message, None);
6409
6410 fs::remove_dir_all(root).unwrap();
6411 }
6412
6413 #[test]
6414 fn search_recovers_when_fts_revision_is_stale() {
6415 let (root, config) = markdown_config("alpha token");
6416 let db = IndexDatabase::rebuild(&config).unwrap();
6417 db.set_meta("fts_source_revision", "stale").unwrap();
6418
6419 let stale = db.status(&config.database).unwrap();
6420 assert!(!stale.fts_dirty);
6421 assert!(!stale.fts_fresh);
6422
6423 let hits = db.search("alpha", 10, false).unwrap();
6424 assert_eq!(hits.len(), 1);
6425 let fresh = db.status(&config.database).unwrap();
6426 assert_eq!(fresh.fts_source_revision.as_deref(), Some(fresh.content_revision.as_str()));
6427 assert!(fresh.fts_fresh);
6428
6429 fs::remove_dir_all(root).unwrap();
6430 }
6431
6432 #[test]
6433 fn parser_failures_report_paths() {
6434 let root = unique_temp_root();
6435 let _ = fs::remove_dir_all(&root);
6436 let src = root.join("src");
6437 fs::create_dir_all(&src).unwrap();
6438 fs::write(src.join("broken.rs"), "pub fn broken(").unwrap();
6439 let config = Config {
6440 root: root.clone(),
6441 database: root.join(".rag-rat/index.sqlite"),
6442 targets: vec![ResolvedTarget {
6443 name: "rust".to_string(),
6444 language: Language::Rust,
6445 directories: vec![PathBuf::from("src")],
6446 include: vec!["**/*.rs".to_string()],
6447 exclude: Vec::new(),
6448 kind: TargetKind::Source,
6449 }],
6450 local_ai: Default::default(),
6451 };
6452
6453 let db = IndexDatabase::rebuild(&config).unwrap();
6454 let status = db.status(&config.database).unwrap();
6455 assert_eq!(status.parser_failures, 1);
6456 assert_eq!(status.parser_failure_paths[0].path, "src/broken.rs");
6457
6458 fs::remove_dir_all(root).unwrap();
6459 }
6460
6461 #[test]
6462 fn repo_memory_bound_to_logical_symbol_surfaces_in_symbol_chunk_and_impact() {
6463 let root = unique_temp_root();
6464 let _ = fs::remove_dir_all(&root);
6465 fs::create_dir_all(root.join("src")).unwrap();
6466 fs::write(
6467 root.join("src/lib.rs"),
6468 "#[cfg(unix)]\npub fn cfg_helper() {}\n#[cfg(windows)]\npub fn cfg_helper() {}\n",
6469 )
6470 .unwrap();
6471 let config = source_config(root.clone(), Language::Rust);
6472 let db = IndexDatabase::rebuild(&config).unwrap();
6473 let symbol = db
6474 .select_symbol(&crate::query::symbol::SymbolSelector {
6475 logical_symbol_id: None,
6476 symbol_id: None,
6477 symbol_path: None,
6478 symbol: Some("cfg_helper".to_string()),
6479 language: Some(Language::Rust),
6480 allow_ambiguous: true,
6481 limit: 10,
6482 })
6483 .unwrap()
6484 .unwrap()
6485 .expect("selected symbol");
6486 let logical_symbol_id = symbol.logical_symbol_id.expect("logical symbol id");
6487
6488 let created = db
6489 .memory_create(crate::query::memory::RepoMemoryCreate {
6490 kind: "Invariant".to_string(),
6491 title: "Treat cfg helper variants as one logical helper".to_string(),
6492 body: "Caller and impact analysis should use the logical symbol, not one cfg body variant."
6493 .to_string(),
6494 confidence: "high".to_string(),
6495 created_by: Some("test-agent".to_string()),
6496 source: Some("agent".to_string()),
6497 tags: vec!["cfg".to_string(), "graph".to_string()],
6498 bind: crate::query::memory::RepoMemoryBindTarget {
6499 logical_symbol_id: Some(logical_symbol_id),
6500 symbol_id: None,
6501 chunk_id: None,
6502 edge_id: None,
6503 path: None,
6504 start_line: None,
6505 end_line: None,
6506 commit_hash: None,
6507 github_owner: None,
6508 github_repo: None,
6509 github_number: None,
6510 start_logical_symbol_id: None,
6511 end_logical_symbol_id: None,
6512 edge_sequence_hash: None,
6513 path_summary: None,
6514 },
6515 })
6516 .unwrap();
6517 assert!(!created.duplicate);
6518 assert_eq!(created.memory.bindings[0].binding_kind, "logical_symbol");
6519
6520 let memories = db.memory_for_symbol(&symbol, 10).unwrap();
6521 assert_eq!(memories.len(), 1);
6522 assert_eq!(memories[0].kind, "Invariant");
6523 let chunk_id = memories[0].bindings[0].chunk_id.expect("bound chunk");
6524 let chunk = db.read_chunk(chunk_id).unwrap().expect("memory chunk");
6525 assert_eq!(chunk.memories.len(), 1);
6526 assert_eq!(chunk.memories[0].memory_id, created.memory.memory_id);
6527
6528 let impact = db
6529 .impact_surface_report_for_selected_symbol(
6530 &symbol,
6531 10,
6532 &crate::query::impact::ImpactSurfaceOptions::default(),
6533 )
6534 .unwrap();
6535 assert_eq!(impact.repo_memories.direct.len(), 1);
6536 assert_eq!(impact.completeness_and_caveats.memory_status.active, 1);
6537 assert_eq!(impact.completeness_and_caveats.memory_status.stale, 0);
6538
6539 fs::remove_dir_all(root).unwrap();
6540 }
6541
6542 #[test]
6543 fn repo_memory_validate_marks_changed_or_missing_anchors_non_current() {
6544 let root = unique_temp_root();
6545 let _ = fs::remove_dir_all(&root);
6546 fs::create_dir_all(root.join("src")).unwrap();
6547 fs::write(root.join("src/lib.rs"), "pub fn anchored_memory() {}\n").unwrap();
6548 let config = source_config(root.clone(), Language::Rust);
6549 let db = IndexDatabase::rebuild(&config).unwrap();
6550 let symbol = db
6551 .select_symbol(&crate::query::symbol::SymbolSelector {
6552 logical_symbol_id: None,
6553 symbol_id: None,
6554 symbol_path: None,
6555 symbol: Some("anchored_memory".to_string()),
6556 language: Some(Language::Rust),
6557 allow_ambiguous: false,
6558 limit: 10,
6559 })
6560 .unwrap()
6561 .unwrap()
6562 .expect("selected symbol");
6563 let chunk_id = db
6564 .storage
6565 .connection()
6566 .query_row(
6567 "
6568 SELECT chunks.id
6569 FROM chunks
6570 JOIN files ON files.id = chunks.file_id
6571 WHERE files.path = ?1 AND chunks.symbol_path = ?2
6572 LIMIT 1
6573 ",
6574 params![symbol.path, symbol.qualified_name],
6575 |row| row.get::<_, i64>(0),
6576 )
6577 .unwrap();
6578 let created = db
6579 .memory_create(crate::query::memory::RepoMemoryCreate {
6580 kind: "Risk".to_string(),
6581 title: "Anchor must become stale when source hash changes".to_string(),
6582 body: "Validation should separate stale memories from current repo evidence."
6583 .to_string(),
6584 confidence: "medium".to_string(),
6585 created_by: Some("test-agent".to_string()),
6586 source: Some("agent".to_string()),
6587 tags: Vec::new(),
6588 bind: crate::query::memory::RepoMemoryBindTarget {
6589 logical_symbol_id: None,
6590 symbol_id: None,
6591 chunk_id: Some(chunk_id),
6592 edge_id: None,
6593 path: None,
6594 start_line: None,
6595 end_line: None,
6596 commit_hash: None,
6597 github_owner: None,
6598 github_repo: None,
6599 github_number: None,
6600 start_logical_symbol_id: None,
6601 end_logical_symbol_id: None,
6602 edge_sequence_hash: None,
6603 path_summary: None,
6604 },
6605 })
6606 .unwrap();
6607
6608 db.storage
6609 .connection()
6610 .execute("UPDATE chunks SET text_hash = 'changed' WHERE id = ?1", [chunk_id])
6611 .unwrap();
6612 let report = db.memory_validate().unwrap();
6613 assert_eq!(report.stale, 1);
6614 let stale = db.memory_for_symbol(&symbol, 10).unwrap();
6615 assert_eq!(stale[0].memory_id, created.memory.memory_id);
6616 assert_eq!(stale[0].bindings[0].anchor_status, "stale");
6617
6618 db.storage.connection().execute("DELETE FROM chunks WHERE id = ?1", [chunk_id]).unwrap();
6619 let report = db.memory_validate().unwrap();
6620 assert_eq!(report.gone, 1);
6621 let gone = db.memory_for_symbol(&symbol, 10).unwrap();
6622 assert_eq!(gone[0].bindings[0].anchor_status, "gone");
6623
6624 fs::remove_dir_all(root).unwrap();
6625 }
6626
6627 #[test]
6628 fn repo_memory_bound_to_edge_surfaces_when_impact_crosses_call_path() {
6629 let root = unique_temp_root();
6630 let _ = fs::remove_dir_all(&root);
6631 fs::create_dir_all(root.join("src")).unwrap();
6632 fs::write(
6633 root.join("src/lib.rs"),
6634 "pub fn target_edge() {}\npub fn caller_edge() {\n target_edge();\n}\n",
6635 )
6636 .unwrap();
6637 let config = source_config(root.clone(), Language::Rust);
6638 let db = IndexDatabase::rebuild(&config).unwrap();
6639 let target = db
6640 .select_symbol(&crate::query::symbol::SymbolSelector {
6641 logical_symbol_id: None,
6642 symbol_id: None,
6643 symbol_path: None,
6644 symbol: Some("target_edge".to_string()),
6645 language: Some(Language::Rust),
6646 allow_ambiguous: false,
6647 limit: 10,
6648 })
6649 .unwrap()
6650 .unwrap()
6651 .expect("selected target");
6652 let graph_options = crate::query::graph::GraphTraversalOptions {
6653 resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
6654 symbol_id: Some(target.symbol_id),
6655 logical_symbol_id: target.logical_symbol_id,
6656 ..Default::default()
6657 };
6658 let callers =
6659 db.graph_traversal_report("find_callers", &target, true, 10, &graph_options).unwrap();
6660 let edge_id = callers.results[0].edge_id;
6661
6662 let edge_memory = db
6663 .memory_create(crate::query::memory::RepoMemoryCreate {
6664 kind: "Risk".to_string(),
6665 title: "caller_edge to target_edge must stay synchronous".to_string(),
6666 body: "This specific call path is used to prove edge-bound memories surface when impact crosses the edge."
6667 .to_string(),
6668 confidence: "high".to_string(),
6669 created_by: Some("test-agent".to_string()),
6670 source: Some("agent".to_string()),
6671 tags: vec!["edge".to_string()],
6672 bind: crate::query::memory::RepoMemoryBindTarget {
6673 logical_symbol_id: None,
6674 symbol_id: None,
6675 chunk_id: None,
6676 edge_id: Some(edge_id),
6677 path: None,
6678 start_line: None,
6679 end_line: None,
6680 commit_hash: None,
6681 github_owner: None,
6682 github_repo: None,
6683 github_number: None,
6684 start_logical_symbol_id: None,
6685 end_logical_symbol_id: None,
6686 edge_sequence_hash: None,
6687 path_summary: None,
6688 },
6689 })
6690 .unwrap();
6691 assert_eq!(edge_memory.memory.bindings[0].binding_kind, "edge");
6692 assert_eq!(edge_memory.memory.bindings[0].edge_id, Some(edge_id));
6693
6694 let impact = db
6695 .impact_surface_report_for_selected_symbol(
6696 &target,
6697 10,
6698 &crate::query::impact::ImpactSurfaceOptions {
6699 resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
6700 ..Default::default()
6701 },
6702 )
6703 .unwrap();
6704 assert!(impact.repo_memories.direct.is_empty());
6705 assert_eq!(impact.repo_memories.path_crossed.len(), 1);
6706 assert_eq!(impact.repo_memories.path_crossed[0].memory_id, edge_memory.memory.memory_id);
6707 assert_eq!(impact.completeness_and_caveats.memory_status.active, 1);
6708
6709 let call_path_memory = db
6710 .memory_create(crate::query::memory::RepoMemoryCreate {
6711 kind: "TestExpectation".to_string(),
6712 title: "caller_edge path hash recall".to_string(),
6713 body: "Call-path memories are addressable by a deterministic edge sequence hash."
6714 .to_string(),
6715 confidence: "medium".to_string(),
6716 created_by: Some("test-agent".to_string()),
6717 source: Some("agent".to_string()),
6718 tags: vec!["call-path".to_string()],
6719 bind: crate::query::memory::RepoMemoryBindTarget {
6720 logical_symbol_id: None,
6721 symbol_id: None,
6722 chunk_id: None,
6723 edge_id: None,
6724 path: None,
6725 start_line: None,
6726 end_line: None,
6727 commit_hash: None,
6728 github_owner: None,
6729 github_repo: None,
6730 github_number: None,
6731 start_logical_symbol_id: target.logical_symbol_id,
6732 end_logical_symbol_id: target.logical_symbol_id,
6733 edge_sequence_hash: Some("edge-sequence-test-hash".to_string()),
6734 path_summary: Some("caller_edge -> target_edge".to_string()),
6735 },
6736 })
6737 .unwrap();
6738 let call_path = db.memory_for_call_path_hash("edge-sequence-test-hash", 10).unwrap();
6739 assert_eq!(call_path.len(), 1);
6740 assert_eq!(call_path[0].memory_id, call_path_memory.memory.memory_id);
6741 assert_eq!(call_path[0].call_paths[0].path_summary, "caller_edge -> target_edge");
6742
6743 fs::remove_dir_all(root).unwrap();
6744 }
6745
6746 #[test]
6747 fn repo_brief_ranks_churn_and_god_module_candidates() {
6748 let root = unique_temp_root();
6749 let _ = fs::remove_dir_all(&root);
6750 fs::create_dir_all(root.join("src")).unwrap();
6751 run_git(&root, &["init"]);
6752 run_git(&root, &["config", "user.name", "Rag Rat"]);
6753 run_git(&root, &["config", "user.email", "rag@example.com"]);
6754
6755 fs::write(root.join("src/stable.rs"), "pub fn stable() -> i32 { 1 }\n").unwrap();
6756 fs::write(root.join("src/hot.rs"), hot_module_text(0)).unwrap();
6757 run_git(&root, &["add", "."]);
6758 run_git(&root, &["commit", "-m", "Add initial modules"]);
6759
6760 for revision in 1..=3 {
6761 fs::write(root.join("src/hot.rs"), hot_module_text(revision)).unwrap();
6762 run_git(&root, &["add", "src/hot.rs"]);
6763 run_git(&root, &["commit", "-m", "Iterate hot module"]);
6764 }
6765
6766 let config = Config {
6767 root: root.clone(),
6768 database: root.join(".rag-rat/index.sqlite"),
6769 targets: vec![ResolvedTarget {
6770 name: "rust".to_string(),
6771 language: Language::Rust,
6772 directories: vec![PathBuf::from("src")],
6773 include: vec!["**/*.rs".to_string()],
6774 exclude: Vec::new(),
6775 kind: TargetKind::Source,
6776 }],
6777 local_ai: Default::default(),
6778 };
6779 let db = IndexDatabase::rebuild(&config).unwrap();
6780
6781 let churn = db
6782 .repo_brief(crate::query::repo_brief::RepoBriefOptions {
6783 mode: crate::query::repo_brief::RepoBriefMode::Churn,
6784 limit: 1,
6785 include_generated: false,
6786 include_memories: true,
6787 })
6788 .unwrap();
6789 assert_eq!(churn.candidates[0].path, "src/hot.rs");
6790 assert_eq!(churn.candidates[0].category, "recent_churn_hotspot");
6791 assert!(churn.candidates[0].score <= 1.0);
6792 assert!(churn.candidates[0].metrics.commit_touch_count >= 4);
6793 assert!(churn.candidates[0].why.iter().any(|reason| reason.contains("churn")));
6794
6795 let god_modules = db
6796 .repo_brief(crate::query::repo_brief::RepoBriefOptions {
6797 mode: crate::query::repo_brief::RepoBriefMode::GodModules,
6798 limit: 1,
6799 include_generated: false,
6800 include_memories: true,
6801 })
6802 .unwrap();
6803 assert_eq!(god_modules.candidates[0].path, "src/hot.rs");
6804 assert!(god_modules.candidates[0].score <= 1.0);
6805 assert!(god_modules.candidates[0].metrics.symbol_count >= 30);
6806 assert!(!god_modules.candidates[0].split_hints.is_empty());
6807 assert!(
6808 god_modules.candidates[0].next_tools.iter().any(|tool| tool.tool == "impact_surface")
6809 );
6810
6811 fs::remove_dir_all(root).unwrap();
6812 }
6813
6814 fn hot_module_text(revision: usize) -> String {
6815 let mut text = String::new();
6816 text.push_str("pub fn entry() -> i32 {\n");
6817 for i in 0..32 {
6818 text.push_str(&format!(" helper_{i}() +\n"));
6819 }
6820 text.push_str(&format!(" {revision}\n}}\n"));
6821 for i in 0..32 {
6822 text.push_str(&format!("pub fn helper_{i}() -> i32 {{ {i} }}\n"));
6823 }
6824 text
6825 }
6826
6827 fn unique_temp_root() -> PathBuf {
6828 let mut root = std::env::temp_dir();
6829 let suffix = TEMP_COUNTER.fetch_add(1, Ordering::Relaxed);
6830 root.push(format!("rag-rat-schema-test-{}-{}-{suffix}", std::process::id(), now_ms()));
6831 root
6832 }
6833
6834 fn fixture_temp_root(fixture: &str) -> PathBuf {
6835 let root = unique_temp_root();
6836 let _ = fs::remove_dir_all(&root);
6837 let fixture_root =
6838 PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../tests/fixtures").join(fixture);
6839 copy_fixture_dir(&fixture_root, &root);
6840 root
6841 }
6842
6843 fn copy_fixture_dir(from: &Path, to: &Path) {
6844 fs::create_dir_all(to).unwrap();
6845 for entry in fs::read_dir(from).unwrap() {
6846 let entry = entry.unwrap();
6847 let from_path = entry.path();
6848 let to_path = to.join(entry.file_name());
6849 if from_path.is_dir() {
6850 copy_fixture_dir(&from_path, &to_path);
6851 } else {
6852 fs::copy(&from_path, &to_path).unwrap();
6853 }
6854 }
6855 }
6856
6857 fn markdown_config(text: &str) -> (PathBuf, Config) {
6858 let root = unique_temp_root();
6859 let _ = fs::remove_dir_all(&root);
6860 let docs = root.join("docs");
6861 fs::create_dir_all(&docs).unwrap();
6862 fs::write(docs.join("search.md"), text).unwrap();
6863 let config = markdown_config_for_root(root.clone());
6864 (root, config)
6865 }
6866
6867 fn markdown_config_for_root(root: PathBuf) -> Config {
6868 Config {
6869 root: root.clone(),
6870 database: root.join(".rag-rat/index.sqlite"),
6871 targets: vec![ResolvedTarget {
6872 name: "markdown".to_string(),
6873 language: Language::Markdown,
6874 directories: vec![PathBuf::from("docs")],
6875 include: vec!["**/*.md".to_string()],
6876 exclude: Vec::new(),
6877 kind: TargetKind::Docs,
6878 }],
6879 local_ai: Default::default(),
6880 }
6881 }
6882
6883 fn source_config(root: PathBuf, language: Language) -> Config {
6884 Config {
6885 root: root.clone(),
6886 database: root.join(".rag-rat/index.sqlite"),
6887 targets: vec![ResolvedTarget {
6888 name: language.as_str().to_string(),
6889 language,
6890 directories: vec![PathBuf::from("src")],
6891 include: vec!["src/".to_string()],
6892 exclude: Vec::new(),
6893 kind: TargetKind::Source,
6894 }],
6895 local_ai: Default::default(),
6896 }
6897 }
6898
6899 fn assert_edge(db: &IndexDatabase, from: &str, to: &str, edge_kind: &str, confidence: &str) {
6900 let count = db
6901 .storage
6902 .connection()
6903 .query_row(
6904 "
6905 SELECT COUNT(*)
6906 FROM edges
6907 WHERE edge_kind = ?1
6908 AND confidence = ?2
6909 AND COALESCE(from_name, '') LIKE ?3
6910 AND to_name LIKE ?4
6911 ",
6912 params![edge_kind, confidence, format!("%{from}%"), format!("%{to}%")],
6913 |row| row.get::<_, i64>(0),
6914 )
6915 .unwrap();
6916 assert!(count > 0, "missing edge {from} -[{edge_kind}/{confidence}]-> {to}");
6917 }
6918
6919 fn table_count(db: &IndexDatabase, table: &str) -> i64 {
6920 db.storage
6921 .connection()
6922 .query_row("SELECT COUNT(*) FROM sqlite_master WHERE name = ?1", [table], |row| {
6923 row.get(0)
6924 })
6925 .unwrap()
6926 }
6927
6928 fn row_count(db: &IndexDatabase, table: &str) -> i64 {
6929 db.storage
6930 .connection()
6931 .query_row(&format!("SELECT COUNT(*) FROM {table}"), [], |row| row.get(0))
6932 .unwrap()
6933 }
6934
6935 fn chunk_columns(db: &IndexDatabase) -> Vec<String> {
6936 table_columns(db, "chunks")
6937 }
6938
6939 fn file_columns(db: &IndexDatabase) -> Vec<String> {
6940 table_columns(db, "files")
6941 }
6942
6943 fn table_columns(db: &IndexDatabase, table: &str) -> Vec<String> {
6944 let mut stmt =
6945 db.storage.connection().prepare(&format!("PRAGMA table_info({table})")).unwrap();
6946 stmt.query_map([], |row| row.get::<_, String>(1)).unwrap().map(Result::unwrap).collect()
6947 }
6948
6949 fn indexed_revision_count(db: &IndexDatabase) -> i64 {
6950 db.storage
6951 .connection()
6952 .query_row("SELECT COUNT(*) FROM files WHERE indexed_revision != ''", [], |row| {
6953 row.get(0)
6954 })
6955 .unwrap()
6956 }
6957
6958 fn chunk_source_revision_count(db: &IndexDatabase) -> i64 {
6959 db.storage
6960 .connection()
6961 .query_row("SELECT COUNT(*) FROM chunks WHERE source_revision != ''", [], |row| {
6962 row.get(0)
6963 })
6964 .unwrap()
6965 }
6966
6967 fn first_chunk_id(db: &IndexDatabase) -> i64 {
6968 db.storage
6969 .connection()
6970 .query_row("SELECT id FROM chunks ORDER BY id LIMIT 1", [], |row| row.get(0))
6971 .unwrap()
6972 }
6973
6974 fn run_git(root: &Path, args: &[&str]) {
6975 let output = Command::new("git").args(args).current_dir(root).output().unwrap();
6976 assert!(
6977 output.status.success(),
6978 "git {:?} failed\nstdout:\n{}\nstderr:\n{}",
6979 args,
6980 String::from_utf8_lossy(&output.stdout),
6981 String::from_utf8_lossy(&output.stderr)
6982 );
6983 }
6984
6985 struct MockGitHubClient;
6986
6987 impl github::GitHubClient for MockGitHubClient {
6988 fn issue(
6989 &self,
6990 owner: &str,
6991 repo: &str,
6992 number: i64,
6993 ) -> anyhow::Result<github::GitHubIssue> {
6994 Ok(github::GitHubIssue {
6995 owner: owner.to_string(),
6996 repo: repo.to_string(),
6997 number,
6998 html_url: format!("https://github.com/{owner}/{repo}/issues/{number}"),
6999 state: "open".to_string(),
7000 title: "Decision: keep sqlite".to_string(),
7001 body: "We decided sqlite is required for binary size.".to_string(),
7002 author: Some("octo".to_string()),
7003 created_at: Some("2026-01-01T00:00:00Z".to_string()),
7004 updated_at: Some("2026-01-02T00:00:00Z".to_string()),
7005 is_pull_request: true,
7006 })
7007 }
7008
7009 fn issue_comments(
7010 &self,
7011 owner: &str,
7012 repo: &str,
7013 number: i64,
7014 ) -> anyhow::Result<Vec<github::GitHubComment>> {
7015 Ok(vec![github::GitHubComment {
7016 id: 4201,
7017 owner: owner.to_string(),
7018 repo: repo.to_string(),
7019 number,
7020 html_url: format!("https://github.com/{owner}/{repo}/issues/{number}#comment-1"),
7021 body: "Rejected alternative: duckdb was too large.".to_string(),
7022 author: Some("octo".to_string()),
7023 created_at: Some("2026-01-01T01:00:00Z".to_string()),
7024 updated_at: Some("2026-01-01T01:00:00Z".to_string()),
7025 }])
7026 }
7027
7028 fn pull(
7029 &self,
7030 owner: &str,
7031 repo: &str,
7032 number: i64,
7033 ) -> anyhow::Result<Option<github::GitHubPullRequest>> {
7034 Ok(Some(github::GitHubPullRequest {
7035 owner: owner.to_string(),
7036 repo: repo.to_string(),
7037 number,
7038 html_url: format!("https://github.com/{owner}/{repo}/pull/{number}"),
7039 state: "open".to_string(),
7040 title: "Use sqlite".to_string(),
7041 body: "Constraint: normal queries must use cache only.".to_string(),
7042 author: Some("octo".to_string()),
7043 created_at: Some("2026-01-01T00:00:00Z".to_string()),
7044 updated_at: Some("2026-01-02T00:00:00Z".to_string()),
7045 merged_at: None,
7046 }))
7047 }
7048
7049 fn pull_reviews(
7050 &self,
7051 owner: &str,
7052 repo: &str,
7053 number: i64,
7054 ) -> anyhow::Result<Vec<github::GitHubReview>> {
7055 Ok(vec![github::GitHubReview {
7056 id: 4202,
7057 owner: owner.to_string(),
7058 repo: repo.to_string(),
7059 number,
7060 html_url: Some(format!("https://github.com/{owner}/{repo}/pull/{number}#review")),
7061 state: "COMMENTED".to_string(),
7062 body: "Risk: live crawling during search would be surprising.".to_string(),
7063 author: Some("reviewer".to_string()),
7064 submitted_at: Some("2026-01-01T02:00:00Z".to_string()),
7065 }])
7066 }
7067
7068 fn pull_review_comments(
7069 &self,
7070 owner: &str,
7071 repo: &str,
7072 number: i64,
7073 ) -> anyhow::Result<Vec<github::GitHubReviewComment>> {
7074 Ok(vec![github::GitHubReviewComment {
7075 id: 4203,
7076 owner: owner.to_string(),
7077 repo: repo.to_string(),
7078 number,
7079 path: Some("docs/search.md".to_string()),
7080 html_url: format!("https://github.com/{owner}/{repo}/pull/{number}#discussion"),
7081 body: "No longer use obsolete duckdb rationale.".to_string(),
7082 author: Some("reviewer".to_string()),
7083 created_at: Some("2026-01-01T03:00:00Z".to_string()),
7084 updated_at: Some("2026-01-01T03:00:00Z".to_string()),
7085 }])
7086 }
7087 }
7088
7089 struct PartiallyFailingGitHubClient;
7090
7091 impl github::GitHubClient for PartiallyFailingGitHubClient {
7092 fn issue(
7093 &self,
7094 owner: &str,
7095 repo: &str,
7096 number: i64,
7097 ) -> anyhow::Result<github::GitHubIssue> {
7098 if number == 404 {
7099 anyhow::bail!("gh: Not Found (HTTP 404)");
7100 }
7101 MockGitHubClient.issue(owner, repo, number)
7102 }
7103
7104 fn issue_comments(
7105 &self,
7106 owner: &str,
7107 repo: &str,
7108 number: i64,
7109 ) -> anyhow::Result<Vec<github::GitHubComment>> {
7110 MockGitHubClient.issue_comments(owner, repo, number)
7111 }
7112
7113 fn pull(
7114 &self,
7115 owner: &str,
7116 repo: &str,
7117 number: i64,
7118 ) -> anyhow::Result<Option<github::GitHubPullRequest>> {
7119 MockGitHubClient.pull(owner, repo, number)
7120 }
7121
7122 fn pull_reviews(
7123 &self,
7124 owner: &str,
7125 repo: &str,
7126 number: i64,
7127 ) -> anyhow::Result<Vec<github::GitHubReview>> {
7128 MockGitHubClient.pull_reviews(owner, repo, number)
7129 }
7130
7131 fn pull_review_comments(
7132 &self,
7133 owner: &str,
7134 repo: &str,
7135 number: i64,
7136 ) -> anyhow::Result<Vec<github::GitHubReviewComment>> {
7137 MockGitHubClient.pull_review_comments(owner, repo, number)
7138 }
7139 }
7140}