1pub mod ai;
2pub mod anchors;
3pub mod chunker;
4pub mod edges;
5pub mod git_history;
6pub mod github;
7pub mod parser;
8pub mod schema;
9pub mod symbols;
10pub mod walker;
11
12#[cfg(test)]
13mod anchor_tests;
14#[cfg(test)]
15mod parser_tests;
16
17use std::{
18 collections::{BTreeMap, BTreeSet},
19 fs,
20 path::{Path, PathBuf},
21 process::Command,
22 sync::{
23 atomic::{AtomicUsize, Ordering},
24 mpsc,
25 },
26 thread,
27 thread::JoinHandle,
28 time::{SystemTime, UNIX_EPOCH},
29};
30
31use gix::{
32 bstr::{BString, ByteSlice},
33 status::{UntrackedFiles, tree_index},
34};
35use rayon::prelude::*;
36use regex::Regex;
37use rusqlite::{OptionalExtension, params};
38use serde::Serialize;
39use sha2::{Digest, Sha256};
40use thiserror::Error;
41
42use crate::{
43 config::{Config, TargetKind},
44 index::{
45 ai::{LocalAiStatus, ModelInfo, ReconcilePlan, ReconcileReport},
46 anchors::{AnchorStatus, ChunkAnchor},
47 chunker::Chunk,
48 git_history::{
49 ChunkBlameSummary, CommitSearchHit, GitHistoryIndexStatus, PathHistoryItem,
50 QueryCommitHit, SymbolHistoryItem,
51 },
52 github::{GitHubEvidence, GitHubStatus, GitHubSyncReport, Papertrail},
53 symbols::Symbol,
54 },
55 language::Language,
56 query::graph_meta::{self, GraphMetaMode},
57 search::lexical::{SearchHit, SearchOptions},
58 storage::IndexConnection,
59 storage::StorageStatus,
60};
61
62#[derive(Debug)]
63pub struct IndexDatabase {
64 storage: IndexConnection,
65 pub active_commit_sha: String,
66 pub active_worktree_id: String,
67}
68
69#[derive(Debug, Clone)]
70pub enum IndexProgress {
71 Started {
72 database: PathBuf,
73 mode: IndexMode,
74 },
75 Discovering,
76 Discovered {
77 files: usize,
78 },
79 PreparingFile {
80 current: usize,
81 total: usize,
82 path: PathBuf,
83 language: Language,
84 kind: TargetKind,
85 },
86 IndexingFile {
87 current: usize,
88 total: usize,
89 path: PathBuf,
90 language: Language,
91 kind: TargetKind,
92 },
93 IndexingGitHistory,
94 RebuildingLogicalSymbols,
95 ResolvingGraph,
96 SyncingFts,
97 RebuildingFts,
98 Finished {
99 files: usize,
100 },
101}
102
103#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
104#[serde(rename_all = "snake_case")]
105pub enum IndexMode {
106 Changed,
107 Discover,
108 Full,
109}
110
111impl IndexMode {
112 pub fn label(self) -> &'static str {
113 match self {
114 Self::Changed => "changed files",
115 Self::Discover => "discovery",
116 Self::Full => "full rebuild",
117 }
118 }
119}
120
121#[derive(Debug, Serialize)]
122pub struct IndexStatus {
123 pub database: String,
124 pub exists: bool,
125 pub schema: schema::SchemaStatus,
126 pub git_commit: Option<String>,
127 pub git_dirty: Option<bool>,
128 pub indexed_at_ms: Option<i64>,
129 pub content_revision: String,
130 pub fts_synced_at_ms: Option<i64>,
131 pub fts_source_revision: Option<String>,
132 pub fts_dirty: bool,
133 pub fts_fresh: bool,
134 pub file_count_by_language: BTreeMap<String, u64>,
135 pub parser_failures: u64,
136 pub parser_failure_paths: Vec<ParserFailure>,
137 pub git_history: GitHistoryIndexStatus,
138 pub github: GitHubStatus,
139 pub local_ai: LocalAiStatus,
140}
141
142#[derive(Debug, Serialize)]
143pub struct HealIndexReport {
144 pub checked_files: u64,
145 pub healed_files: u64,
146 pub removed_files: u64,
147 pub skipped_files: u64,
148 pub fts_fresh: bool,
149 pub message: Option<String>,
150}
151
152#[derive(Debug, Serialize)]
153pub struct ParserFailure {
154 pub path: String,
155 pub language: String,
156 pub message: String,
157}
158
159#[derive(Debug, Serialize)]
160pub struct DiscoveryStatus {
161 pub discovered_files: usize,
162 pub indexed_files: usize,
163 pub unindexed_files: usize,
164 pub unindexed_source_files: usize,
165 pub changed_indexed_files: usize,
166 pub removed_indexed_files: usize,
167 pub unindexed_sample: Vec<String>,
168 pub warning: Option<String>,
169}
170
171const MAX_AUTO_HEAL_FILES_PER_CALL: usize = 4;
172const GRAPH_INDEX_VERSION: &str = "6";
173
174#[derive(Debug, Error)]
175pub enum IndexError {
176 #[error("Gone: indexed chunk {chunk_id} no longer exists")]
177 Gone { chunk_id: i64 },
178 #[error("StaleChunk: chunk {chunk_id} in {path} could not be relocated after reindex")]
179 StaleChunk { chunk_id: i64, path: String },
180 #[error("needs_reindex: {stale_files} stale files exceeds automatic heal cap {cap}")]
181 NeedsReindex { stale_files: usize, cap: usize },
182}
183
184impl IndexDatabase {
185 pub fn open(path: &Path) -> anyhow::Result<Self> {
186 Self::open_with_graph_check(path, true)
187 }
188
189 pub fn database_path(&self) -> &Path {
190 self.storage.database_path()
191 }
192
193 fn open_with_graph_check(path: &Path, check_graph: bool) -> anyhow::Result<Self> {
194 let mut storage = IndexConnection::open(path)?;
195 schema::check_compatible(storage.connection())?;
196 ai::ensure_model_manifest(storage.connection())?;
197 if let Some(root) = meta_for(storage.connection(), "source_root")? {
198 storage.set_source_root(PathBuf::from(root));
199 }
200 let db =
201 Self { storage, active_commit_sha: String::new(), active_worktree_id: String::new() };
202 if check_graph {
203 db.ensure_graph_index_current()?;
204 }
205 Ok(db)
206 }
207
208 pub fn open_config(config: &Config) -> anyhow::Result<Self> {
209 let mut db = Self::open_with_graph_check(&config.database, false)?;
210 db.storage.set_source_root(config.root.clone());
211 let (commit_sha, worktree_id) = resolve_git_context(&config.root);
212 db.set_context(&commit_sha, &worktree_id)?;
213 db.ensure_graph_index_current()?;
214 Ok(db)
215 }
216
217 pub fn migrate(path: &Path) -> anyhow::Result<schema::SchemaStatus> {
218 Self::migrate_with_fastembed_cache(path, None)
219 }
220
221 fn migrate_with_fastembed_cache(
222 path: &Path,
223 fastembed_cache_dir: Option<&Path>,
224 ) -> anyhow::Result<schema::SchemaStatus> {
225 let storage = IndexConnection::open(path)?;
226 let status = schema::status(storage.connection())?;
227 match status.state {
228 schema::SchemaState::Newer | schema::SchemaState::Dirty => {
229 anyhow::bail!("{}", status.message);
230 },
231 schema::SchemaState::Compatible => {},
232 schema::SchemaState::Missing | schema::SchemaState::Older => {
233 schema::apply(storage.connection())?;
234 },
235 }
236 ai::ensure_model_manifest(storage.connection())?;
237 if let Some(fastembed_cache_dir) = fastembed_cache_dir {
238 ai::recover_cached_fastembed_model_from(storage.connection(), fastembed_cache_dir)?;
239 } else {
240 ai::recover_cached_fastembed_model(storage.connection())?;
241 }
242 schema::status(storage.connection())
243 }
244
245 pub fn migration_check(path: &Path) -> anyhow::Result<schema::SchemaStatus> {
246 let storage = IndexConnection::open(path)?;
247 schema::status(storage.connection())
248 }
249
250 fn create_or_migrate(path: &Path) -> anyhow::Result<Self> {
251 let mut storage = IndexConnection::open(path)?;
252 schema::apply(storage.connection())?;
253 ai::ensure_model_manifest(storage.connection())?;
254 if let Some(root) = meta_for(storage.connection(), "source_root")? {
255 storage.set_source_root(PathBuf::from(root));
256 }
257 Ok(Self { storage, active_commit_sha: String::new(), active_worktree_id: String::new() })
258 }
259
260 pub fn set_context(&mut self, commit_sha: &str, worktree_id: &str) -> anyhow::Result<()> {
261 self.active_commit_sha = commit_sha.to_string();
262 self.active_worktree_id = worktree_id.to_string();
263
264 let conn = self.storage.connection();
265 conn.execute_batch(
266 "
267 CREATE TEMP TABLE IF NOT EXISTS connection_context(key TEXT PRIMARY KEY, value TEXT);
268 ",
269 )?;
270
271 let mut stmt = conn.prepare(
272 "INSERT OR REPLACE INTO temp.connection_context(key, value) VALUES (?1, ?2)",
273 )?;
274 stmt.execute(params!["commit_sha", commit_sha])?;
275 stmt.execute(params!["worktree_id", worktree_id])?;
276
277 conn.execute_batch("
278 DROP VIEW IF EXISTS temp.files;
279 CREATE TEMP VIEW temp.files AS
280 SELECT id, path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms, indexed_revision, commit_sha, worktree_id
281 FROM main.files
282 WHERE worktree_id = (SELECT value FROM temp.connection_context WHERE key = 'worktree_id') AND worktree_id != '' AND kind != 'deleted'
283 UNION ALL
284 SELECT id, path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms, indexed_revision, commit_sha, worktree_id
285 FROM main.files
286 WHERE commit_sha = (SELECT value FROM temp.connection_context WHERE key = 'commit_sha')
287 AND commit_sha != ''
288 AND path NOT IN (
289 SELECT path FROM main.files
290 WHERE worktree_id = (SELECT value FROM temp.connection_context WHERE key = 'worktree_id')
291 AND worktree_id != ''
292 );
293 ")?;
294
295 Ok(())
296 }
297
298 pub fn rebuild(config: &Config) -> anyhow::Result<Self> {
299 Self::rebuild_with_progress(config, |_| {})
300 }
301
302 pub fn rebuild_with_progress<F>(config: &Config, mut progress: F) -> anyhow::Result<Self>
303 where
304 F: FnMut(IndexProgress),
305 {
306 progress(IndexProgress::Started {
307 database: config.database.clone(),
308 mode: IndexMode::Full,
309 });
310 let mut db = Self::create_or_migrate(&config.database)?;
311 let (commit_sha, worktree_id) = resolve_git_context(&config.root);
312 db.set_context(&commit_sha, &worktree_id)?;
313 progress(IndexProgress::IndexingGitHistory);
314 let mut git_history = Some(spawn_git_history_prepare(&config.root));
315 let result = (|| -> anyhow::Result<()> {
316 db.storage.execute_batch("BEGIN TRANSACTION")?;
317 db.clear_full_rebuild_tables()?;
318 db.set_meta("source_root", &config.root.display().to_string())?;
319 db.storage.set_source_root(config.root.clone());
320 db.write_git_meta(&config.root)?;
321 let indexed = db.index_targets_with_progress(config, &mut progress)?;
322 db.apply_prepared_git_history(
323 &config.root,
324 git_history
325 .take()
326 .ok_or_else(|| anyhow::anyhow!("git history preparation was already used"))?,
327 )?;
328 progress(IndexProgress::RebuildingLogicalSymbols);
329 db.rebuild_logical_symbols()?;
330 progress(IndexProgress::ResolvingGraph);
331 db.resolve_edges()?;
332 db.mark_graph_index_current()?;
333 progress(IndexProgress::RebuildingFts);
334 db.rebuild_fts()?;
335 db.set_meta("indexed_at_ms", &now_ms().to_string())?;
336 db.storage.execute_batch("COMMIT")?;
337 progress(IndexProgress::Finished { files: indexed });
338 Ok(())
339 })();
340 if result.is_err() {
341 if let Some(handle) = git_history.take() {
342 let _ = join_git_history_prepare(handle);
343 }
344 let _ = db.storage.execute_batch("ROLLBACK");
345 }
346 result?;
347 Ok(db)
348 }
349
350 fn clear_full_rebuild_tables(&self) -> anyhow::Result<()> {
351 self.storage.execute_batch(
352 "
353 CREATE TEMP TABLE IF NOT EXISTS full_rebuild_file_ids(id INTEGER PRIMARY KEY);
354 DELETE FROM temp.full_rebuild_file_ids;
355 INSERT OR IGNORE INTO temp.full_rebuild_file_ids(id)
356 SELECT id
357 FROM main.files
358 WHERE worktree_id = (SELECT value FROM temp.connection_context WHERE key = 'worktree_id')
359 AND worktree_id != '';
360 INSERT OR IGNORE INTO temp.full_rebuild_file_ids(id)
361 SELECT id
362 FROM main.files
363 WHERE commit_sha = (SELECT value FROM temp.connection_context WHERE key = 'commit_sha')
364 AND commit_sha != ''
365 AND path NOT IN (
366 SELECT path FROM main.files
367 WHERE worktree_id = (SELECT value FROM temp.connection_context WHERE key = 'worktree_id')
368 AND worktree_id != ''
369 );
370
371 UPDATE main.edges
372 SET to_symbol_id = NULL,
373 target_start_line = NULL,
374 target_end_line = NULL,
375 resolution = 'unresolved'
376 WHERE to_symbol_id IN (
377 SELECT symbols.id
378 FROM main.symbols
379 JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = symbols.file_id
380 );
381 DELETE FROM main.edges
382 WHERE source_file_id IN (SELECT id FROM temp.full_rebuild_file_ids)
383 OR from_symbol_id IN (
384 SELECT symbols.id
385 FROM main.symbols
386 JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = symbols.file_id
387 );
388
389 DELETE FROM main.logical_symbol_members
390 WHERE symbol_id IN (
391 SELECT symbols.id
392 FROM main.symbols
393 JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = symbols.file_id
394 );
395 DELETE FROM main.logical_symbols
396 WHERE id NOT IN (
397 SELECT logical_symbol_id FROM main.logical_symbol_members
398 );
399 DELETE FROM main.symbol_facts
400 WHERE symbol_id IN (
401 SELECT symbols.id
402 FROM main.symbols
403 JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = symbols.file_id
404 );
405 DELETE FROM main.chunk_fts
406 WHERE rowid IN (
407 SELECT chunks.id
408 FROM main.chunks
409 JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = chunks.file_id
410 );
411 DELETE FROM main.chunk_summaries
412 WHERE chunk_id IN (
413 SELECT chunks.id
414 FROM main.chunks
415 JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = chunks.file_id
416 );
417 DELETE FROM main.chunk_embeddings
418 WHERE chunk_id IN (
419 SELECT chunks.id
420 FROM main.chunks
421 JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = chunks.file_id
422 );
423 DELETE FROM main.git_chunk_blame
424 WHERE chunk_id IN (
425 SELECT chunks.id
426 FROM main.chunks
427 JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = chunks.file_id
428 );
429 DELETE FROM main.docs
430 WHERE chunk_id IN (
431 SELECT chunks.id
432 FROM main.chunks
433 JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = chunks.file_id
434 );
435 DELETE FROM main.parser_failures
436 WHERE path IN (
437 SELECT path
438 FROM main.files
439 JOIN temp.full_rebuild_file_ids ON full_rebuild_file_ids.id = files.id
440 );
441 DELETE FROM main.symbols
442 WHERE file_id IN (SELECT id FROM temp.full_rebuild_file_ids);
443 DELETE FROM main.chunks
444 WHERE file_id IN (SELECT id FROM temp.full_rebuild_file_ids);
445 DELETE FROM main.files
446 WHERE id IN (SELECT id FROM temp.full_rebuild_file_ids);
447 DELETE FROM temp.full_rebuild_file_ids;
448 ",
449 )?;
450 Ok(())
451 }
452
453 pub fn index_changed(config: &Config) -> anyhow::Result<Self> {
454 Self::index_changed_with_progress(config, |_| {})
455 }
456
457 pub fn index_changed_with_progress<F>(config: &Config, mut progress: F) -> anyhow::Result<Self>
458 where
459 F: FnMut(IndexProgress),
460 {
461 Self::index_incremental_with_progress(config, IndexMode::Changed, &mut progress)
462 }
463
464 pub fn index_discover(config: &Config) -> anyhow::Result<Self> {
465 Self::index_discover_with_progress(config, |_| {})
466 }
467
468 pub fn index_discover_with_progress<F>(config: &Config, mut progress: F) -> anyhow::Result<Self>
469 where
470 F: FnMut(IndexProgress),
471 {
472 Self::index_incremental_with_progress(config, IndexMode::Discover, &mut progress)
473 }
474
475 fn index_incremental_with_progress<F>(
476 config: &Config,
477 mode: IndexMode,
478 progress: &mut F,
479 ) -> anyhow::Result<Self>
480 where
481 F: FnMut(IndexProgress),
482 {
483 if !config.database.exists() {
484 return Self::rebuild_with_progress(config, progress);
485 }
486 if Self::migration_check(&config.database)?.state == schema::SchemaState::Missing {
487 return Self::rebuild_with_progress(config, progress);
488 }
489
490 let mut db = Self::open(&config.database)?;
491 let (commit_sha, worktree_id) = resolve_git_context(&config.root);
492 db.set_context(&commit_sha, &worktree_id)?;
493 if db.indexed_file_count()? == 0 {
494 return Self::rebuild_with_progress(config, progress);
495 }
496 progress(IndexProgress::Started { database: config.database.clone(), mode });
497 progress(IndexProgress::IndexingGitHistory);
498 let mut git_history = Some(spawn_git_history_prepare(&config.root));
499 let result = (|| -> anyhow::Result<()> {
500 db.storage.execute_batch("BEGIN TRANSACTION")?;
501 db.set_meta("source_root", &config.root.display().to_string())?;
502 db.storage.set_source_root(config.root.clone());
503 db.write_git_meta(&config.root)?;
504 let indexed = match mode {
505 IndexMode::Changed => db.index_changed_files_with_progress(config, progress)?,
506 IndexMode::Discover => db.index_discovered_files_with_progress(config, progress)?,
507 IndexMode::Full => unreachable!("full mode is handled by rebuild_with_progress"),
508 };
509 db.apply_prepared_git_history(
510 &config.root,
511 git_history
512 .take()
513 .ok_or_else(|| anyhow::anyhow!("git history preparation was already used"))?,
514 )?;
515 if indexed > 0 {
516 progress(IndexProgress::RebuildingLogicalSymbols);
517 db.rebuild_logical_symbols()?;
518 progress(IndexProgress::ResolvingGraph);
519 db.resolve_edges()?;
520 db.mark_graph_index_current()?;
521 progress(IndexProgress::SyncingFts);
522 db.sync_fts()?;
523 }
524 db.set_meta("indexed_at_ms", &now_ms().to_string())?;
525 db.storage.execute_batch("COMMIT")?;
526 progress(IndexProgress::Finished { files: indexed });
527 Ok(())
528 })();
529 if result.is_err() {
530 if let Some(handle) = git_history.take() {
531 let _ = join_git_history_prepare(handle);
532 }
533 let _ = db.storage.execute_batch("ROLLBACK");
534 }
535 result?;
536 Ok(db)
537 }
538
539 pub fn index_targets(&self, config: &Config) -> anyhow::Result<()> {
540 self.index_targets_with_progress(config, &mut |_| {})?;
541 Ok(())
542 }
543
544 fn index_targets_with_progress<F>(
545 &self,
546 config: &Config,
547 progress: &mut F,
548 ) -> anyhow::Result<usize>
549 where
550 F: FnMut(IndexProgress),
551 {
552 progress(IndexProgress::Discovering);
553 let files = collect_index_files(config)?;
554 let changes = git_changed_paths(&config.root).unwrap_or_default();
555 let files = self.assign_file_scopes(files, &changes);
556 progress(IndexProgress::Discovered { files: files.len() });
557
558 let prepared = prepare_files_with_progress(&files, progress)?;
559 for (index, prepared_file) in prepared.iter().enumerate() {
560 let current = index + 1;
561 if should_report_file_progress(current, files.len()) {
562 progress(IndexProgress::IndexingFile {
563 current,
564 total: files.len(),
565 path: prepared_file.file.relative_path.clone(),
566 language: prepared_file.file.language,
567 kind: prepared_file.file.kind,
568 });
569 }
570 self.insert_prepared_file(prepared_file)?;
571 }
572
573 Ok(files.len())
574 }
575
576 fn index_changed_files_with_progress<F>(
577 &self,
578 config: &Config,
579 progress: &mut F,
580 ) -> anyhow::Result<usize>
581 where
582 F: FnMut(IndexProgress),
583 {
584 progress(IndexProgress::Discovering);
585 let changes = git_changed_paths(&config.root)?;
586 let files = collect_changed_index_files(config, &changes)?;
587 let files = self.assign_file_scopes(files, &changes);
588 self.apply_incremental_file_plan(files, changes.deleted, progress)
589 }
590
591 fn index_discovered_files_with_progress<F>(
592 &self,
593 config: &Config,
594 progress: &mut F,
595 ) -> anyhow::Result<usize>
596 where
597 F: FnMut(IndexProgress),
598 {
599 progress(IndexProgress::Discovering);
600 let plan = discovery_plan(self.storage.connection(), config)?;
601 let changes = git_changed_paths(&config.root).unwrap_or_default();
602 let files = self.assign_file_scopes(plan.files, &changes);
603 self.apply_incremental_file_plan(files, plan.deleted, progress)
604 }
605
606 fn assign_file_scopes(
607 &self,
608 files: Vec<IndexFile>,
609 changes: &GitChangedPaths,
610 ) -> Vec<IndexFile> {
611 let has_base_commit = !self.active_commit_sha.is_empty();
612 files
613 .into_iter()
614 .map(|mut file| {
615 if !has_base_commit || changes.changed.contains(&file.relative_path) {
616 file.commit_sha.clear();
617 file.worktree_id.clone_from(&self.active_worktree_id);
618 } else {
619 file.commit_sha.clone_from(&self.active_commit_sha);
620 file.worktree_id.clear();
621 }
622 file
623 })
624 .collect()
625 }
626
627 fn apply_incremental_file_plan<F>(
628 &self,
629 files: Vec<IndexFile>,
630 deleted: BTreeSet<PathBuf>,
631 progress: &mut F,
632 ) -> anyhow::Result<usize>
633 where
634 F: FnMut(IndexProgress),
635 {
636 progress(IndexProgress::Discovered { files: files.len() });
637
638 let deleted_count = deleted.len();
639 for path in deleted {
640 self.mark_file_deleted(&path)?;
641 }
642
643 let prepared = prepare_files_with_progress(&files, progress)?;
644 for (index, prepared_file) in prepared.iter().enumerate() {
645 let current = index + 1;
646 if should_report_file_progress(current, files.len()) {
647 progress(IndexProgress::IndexingFile {
648 current,
649 total: files.len(),
650 path: prepared_file.file.relative_path.clone(),
651 language: prepared_file.file.language,
652 kind: prepared_file.file.kind,
653 });
654 }
655 self.remove_file_in_scope(
656 &prepared_file.file.relative_path,
657 &prepared_file.file.commit_sha,
658 &prepared_file.file.worktree_id,
659 )?;
660 self.insert_prepared_file(prepared_file)?;
661 }
662
663 Ok(files.len() + deleted_count)
664 }
665
666 pub fn status(&self, database: &Path) -> anyhow::Result<IndexStatus> {
667 let mut counts = BTreeMap::new();
668 let mut stmt = self
669 .storage
670 .connection()
671 .prepare("SELECT language, COUNT(*) FROM files GROUP BY language ORDER BY language")?;
672 let rows =
673 stmt.query_map([], |row| Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?)))?;
674 for row in rows {
675 let (language, count) = row?;
676 counts.insert(language, u64::try_from(count).unwrap_or(0));
677 }
678
679 let content_revision = self.content_revision()?;
680 let fts_source_revision = self.meta("fts_source_revision")?;
681 let fts_dirty = self.fts_dirty()?;
682
683 Ok(IndexStatus {
684 database: database.display().to_string(),
685 exists: database.exists(),
686 schema: schema::status(self.storage.connection())?,
687 git_commit: self.meta("git_commit")?,
688 git_dirty: self.meta("git_dirty")?.map(|value| value == "true"),
689 indexed_at_ms: self.meta("indexed_at_ms")?.and_then(|value| value.parse::<i64>().ok()),
690 content_revision: content_revision.clone(),
691 fts_synced_at_ms: self
692 .meta("fts_synced_at_ms")?
693 .and_then(|value| value.parse::<i64>().ok()),
694 fts_dirty,
695 fts_fresh: !fts_dirty
696 && fts_source_revision.as_deref() == Some(content_revision.as_str()),
697 fts_source_revision,
698 file_count_by_language: counts,
699 parser_failures: self.parser_failure_count()?,
700 parser_failure_paths: self.parser_failure_paths()?,
701 git_history: self.git_history_status()?,
702 github: self.github_status()?,
703 local_ai: self.local_ai_status()?,
704 })
705 }
706
707 pub fn storage_status(&self) -> anyhow::Result<StorageStatus> {
708 self.storage.status()
709 }
710
711 pub fn discovery_status(&self, config: &Config) -> anyhow::Result<DiscoveryStatus> {
712 let plan = discovery_plan(self.storage.connection(), config)?;
713 let unindexed_source_files =
714 plan.unindexed.iter().filter(|file| file.kind == TargetKind::Source).count();
715 let unindexed_sample =
716 plan.unindexed.iter().take(10).map(|file| path_string(&file.relative_path)).collect();
717 let warning = (unindexed_source_files > 0).then(|| {
718 format!(
719 "{unindexed_source_files} unindexed source files detected. Run `rag-rat index --full` or `rag-rat index --discover`."
720 )
721 });
722 Ok(DiscoveryStatus {
723 discovered_files: plan.discovered_files,
724 indexed_files: plan.indexed_files,
725 unindexed_files: plan.unindexed.len(),
726 unindexed_source_files,
727 changed_indexed_files: plan.changed.len(),
728 removed_indexed_files: plan.deleted.len(),
729 unindexed_sample,
730 warning,
731 })
732 }
733
734 pub fn search(
735 &self,
736 query: &str,
737 limit: u32,
738 include_generated: bool,
739 ) -> anyhow::Result<Vec<SearchHit>> {
740 self.search_with_graph_meta(query, limit, include_generated, GraphMetaMode::Compact, 3)
741 }
742
743 pub fn search_explain(
744 &self,
745 query: &str,
746 limit: u32,
747 include_generated: bool,
748 ) -> anyhow::Result<Vec<SearchHit>> {
749 self.search_explain_with_graph_meta(
750 query,
751 limit,
752 include_generated,
753 GraphMetaMode::Compact,
754 3,
755 )
756 }
757
758 pub fn search_with_graph_meta(
759 &self,
760 query: &str,
761 limit: u32,
762 include_generated: bool,
763 graph_mode: GraphMetaMode,
764 graph_limit: u32,
765 ) -> anyhow::Result<Vec<SearchHit>> {
766 self.search_with_graph_meta_options(
767 query,
768 limit,
769 include_generated,
770 graph_mode,
771 graph_limit,
772 SearchOptions::default(),
773 )
774 }
775
776 pub fn search_with_graph_meta_options(
777 &self,
778 query: &str,
779 limit: u32,
780 include_generated: bool,
781 graph_mode: GraphMetaMode,
782 graph_limit: u32,
783 options: SearchOptions,
784 ) -> anyhow::Result<Vec<SearchHit>> {
785 self.ensure_fts_fresh()?;
786 let mut hits =
787 self.search_with_heal(query, limit, include_generated, true, false, options)?;
788 graph_meta::attach_to_search_hits(
789 self.storage.connection(),
790 &mut hits,
791 graph_mode,
792 graph_limit,
793 )?;
794 Ok(hits)
795 }
796
797 pub fn search_explain_with_graph_meta(
798 &self,
799 query: &str,
800 limit: u32,
801 include_generated: bool,
802 graph_mode: GraphMetaMode,
803 graph_limit: u32,
804 ) -> anyhow::Result<Vec<SearchHit>> {
805 self.search_explain_with_graph_meta_options(
806 query,
807 limit,
808 include_generated,
809 graph_mode,
810 graph_limit,
811 SearchOptions::default(),
812 )
813 }
814
815 pub fn search_explain_with_graph_meta_options(
816 &self,
817 query: &str,
818 limit: u32,
819 include_generated: bool,
820 graph_mode: GraphMetaMode,
821 graph_limit: u32,
822 options: SearchOptions,
823 ) -> anyhow::Result<Vec<SearchHit>> {
824 self.ensure_fts_fresh()?;
825 let mut hits =
826 self.search_with_heal(query, limit, include_generated, true, true, options)?;
827 graph_meta::attach_to_search_hits(
828 self.storage.connection(),
829 &mut hits,
830 graph_mode,
831 graph_limit,
832 )?;
833 Ok(hits)
834 }
835
836 pub fn symbols(
837 &self,
838 name: &str,
839 language: Option<Language>,
840 limit: u32,
841 ) -> anyhow::Result<Vec<crate::query::symbol::SymbolHit>> {
842 crate::query::symbol::lookup(self.storage.connection(), name, language, limit)
843 }
844
845 pub fn symbol_candidates(
846 &self,
847 selector: &crate::query::symbol::SymbolSelector,
848 ) -> anyhow::Result<crate::query::symbol::SymbolLookup> {
849 crate::query::symbol::lookup_candidates(self.storage.connection(), selector)
850 }
851
852 pub fn select_symbol(
853 &self,
854 selector: &crate::query::symbol::SymbolSelector,
855 ) -> anyhow::Result<
856 Result<Option<crate::query::symbol::SymbolHit>, crate::query::symbol::SymbolDisambiguation>,
857 > {
858 crate::query::symbol::select_one(self.storage.connection(), selector)
859 }
860
861 pub fn read_chunk(&self, chunk_id: i64) -> anyhow::Result<Option<crate::query::ReadChunk>> {
862 self.read_chunk_with_graph_and_memories(chunk_id, GraphMetaMode::Full, 20, true)
863 }
864
865 pub fn read_chunk_with_graph(
866 &self,
867 chunk_id: i64,
868 graph_mode: GraphMetaMode,
869 graph_limit: u32,
870 ) -> anyhow::Result<Option<crate::query::ReadChunk>> {
871 self.read_chunk_with_graph_and_memories(chunk_id, graph_mode, graph_limit, false)
872 }
873
874 pub fn read_chunk_with_graph_and_memories(
875 &self,
876 chunk_id: i64,
877 graph_mode: GraphMetaMode,
878 graph_limit: u32,
879 include_memories: bool,
880 ) -> anyhow::Result<Option<crate::query::ReadChunk>> {
881 let Some(mut chunk) = self.read_chunk_current(chunk_id)? else {
882 return Ok(None);
883 };
884 graph_meta::attach_to_read_chunk(
885 self.storage.connection(),
886 &mut chunk,
887 graph_mode,
888 graph_limit,
889 )?;
890 if include_memories {
891 chunk.memories =
892 crate::query::memory::memories_for_chunk(self.storage.connection(), chunk_id, 20)?;
893 }
894 Ok(Some(chunk))
895 }
896
897 fn read_chunk_current(&self, chunk_id: i64) -> anyhow::Result<Option<crate::query::ReadChunk>> {
898 let Some(mut chunk) = crate::query::read_chunk(self.storage.connection(), chunk_id)? else {
899 return Ok(None);
900 };
901 let Some(root) = self.storage.source_root() else {
902 return Ok(Some(chunk));
903 };
904 let source_path = root.join(&chunk.path);
905 let current_text = match fs::read_to_string(&source_path) {
906 Ok(text) => text,
907 Err(_) => {
908 let path = chunk.path.clone();
909 self.mark_file_deleted(Path::new(&path))?;
910 self.sync_fts()?;
911 anyhow::bail!(IndexError::Gone { chunk_id });
912 },
913 };
914 let anchor = self.chunk_anchor(chunk_id)?;
915 let status = anchors::validate(
916 &chunk.text,
917 usize::try_from(chunk.start_line).unwrap_or(1),
918 usize::try_from(chunk.end_line).unwrap_or(1),
919 &anchor,
920 ¤t_text,
921 );
922 match status {
923 AnchorStatus::Exact => {
924 if let Some(text) = anchors::slice_lines(
925 ¤t_text,
926 usize::try_from(chunk.start_line).unwrap_or(1),
927 usize::try_from(chunk.end_line).unwrap_or(1),
928 ) {
929 chunk.text = text;
930 }
931 Ok(Some(chunk))
932 },
933 AnchorStatus::Relocated { start_line, end_line, text } => {
934 chunk.start_line = i64::try_from(start_line)?;
935 chunk.end_line = i64::try_from(end_line)?;
936 chunk.text = text;
937 Ok(Some(chunk))
938 },
939 AnchorStatus::Stale => {
940 self.heal_file(Path::new(&chunk.path))?;
941 self.sync_fts()?;
942 let healed = crate::query::read_chunk(self.storage.connection(), chunk_id)?;
943 match healed {
944 Some(chunk) => Ok(Some(chunk)),
945 None => anyhow::bail!(IndexError::StaleChunk { chunk_id, path: chunk.path }),
946 }
947 },
948 }
949 }
950
951 pub fn search_hash_baseline(
952 &self,
953 query: &str,
954 limit: u32,
955 include_generated: bool,
956 ) -> anyhow::Result<Vec<SearchHit>> {
957 self.ensure_fts_fresh()?;
958 crate::search::lexical::search_hash_baseline(
959 self.storage.connection(),
960 query,
961 limit,
962 include_generated,
963 )
964 }
965
966 pub fn docs_for_symbol(&self, symbol: &str, limit: u32) -> anyhow::Result<Vec<SearchHit>> {
967 self.search(symbol, limit, true)
968 }
969
970 pub fn docs_for_selected_symbol(
971 &self,
972 symbol: &crate::query::symbol::SymbolHit,
973 limit: u32,
974 ) -> anyhow::Result<Vec<SearchHit>> {
975 let mut hits = self.local_symbol_context_hits(symbol, limit)?;
976 hits.extend(self.search(&symbol.name, limit.saturating_mul(4).max(limit), true)?);
977 rank_docs_for_symbol(symbol, &mut hits);
978 dedupe_search_hits(&mut hits);
979 hits.truncate(usize::try_from(limit).unwrap_or(usize::MAX));
980 Ok(hits)
981 }
982
983 pub fn commit_search(&self, query: &str, limit: u32) -> anyhow::Result<Vec<CommitSearchHit>> {
984 git_history::commit_search(self.storage.connection(), query, limit)
985 }
986
987 pub fn git_history_for_path(
988 &self,
989 path: &str,
990 limit: u32,
991 ) -> anyhow::Result<Vec<PathHistoryItem>> {
992 git_history::history_for_path(self.storage.connection(), path, limit)
993 }
994
995 pub fn git_history_for_symbol(
996 &self,
997 symbol: &str,
998 language: Option<Language>,
999 limit: u32,
1000 ) -> anyhow::Result<Vec<SymbolHistoryItem>> {
1001 let symbols = self.symbols(symbol, language, limit)?;
1002 let per_symbol_limit = limit.max(1);
1003 let mut out = Vec::new();
1004 for symbol_hit in symbols {
1005 for commit in self.git_history_for_path(&symbol_hit.path, per_symbol_limit)? {
1006 out.push(SymbolHistoryItem {
1007 symbol: symbol_hit.name.clone(),
1008 qualified_name: symbol_hit.qualified_name.clone(),
1009 path: symbol_hit.path.clone(),
1010 start_byte: symbol_hit.start_byte,
1011 end_byte: symbol_hit.end_byte,
1012 commit,
1013 evidence_kind: "historical",
1014 });
1015 if out.len() >= usize::try_from(limit).unwrap_or(usize::MAX) {
1016 return Ok(out);
1017 }
1018 }
1019 }
1020 Ok(out)
1021 }
1022
1023 pub fn commits_touching_query(
1024 &self,
1025 query: &str,
1026 limit: u32,
1027 ) -> anyhow::Result<Vec<QueryCommitHit>> {
1028 let current_hits = self.search(query, limit, true)?;
1029 git_history::commits_touching_query(self.storage.connection(), query, limit, ¤t_hits)
1030 }
1031
1032 pub fn git_blame_chunk(&self, chunk_id: i64) -> anyhow::Result<Option<ChunkBlameSummary>> {
1033 let Some(chunk) = self.read_chunk(chunk_id)? else {
1034 return Ok(None);
1035 };
1036 let source_text_hash = git_history::source_text_hash(&chunk.text);
1037 if let Some(cached) =
1038 git_history::cached_blame(self.storage.connection(), chunk_id, &source_text_hash)?
1039 {
1040 return Ok(Some(cached));
1041 }
1042 let Some(root) = self.storage.source_root() else {
1043 return Ok(Some(ChunkBlameSummary {
1044 chunk_id,
1045 path: chunk.path,
1046 start_line: chunk.start_line,
1047 end_line: chunk.end_line,
1048 source_text_hash,
1049 line_count: 0,
1050 dominant_commit: None,
1051 dominant_commit_lines: 0,
1052 newest_commit: None,
1053 newest_commit_time_s: None,
1054 oldest_commit: None,
1055 oldest_commit_time_s: None,
1056 commit_counts: BTreeMap::new(),
1057 evidence_kind: "historical",
1058 }));
1059 };
1060 let blame_lines =
1061 git_history::blame_lines(root, &chunk.path, chunk.start_line, chunk.end_line);
1062 let mut counts = BTreeMap::<String, i64>::new();
1063 let mut newest = None::<(String, i64)>;
1064 let mut oldest = None::<(String, i64)>;
1065 for line in &blame_lines {
1066 *counts.entry(line.commit.clone()).or_default() += 1;
1067 if let Some(time) = line.author_time_s {
1068 if newest.as_ref().is_none_or(|(_, newest_time)| time > *newest_time) {
1069 newest = Some((line.commit.clone(), time));
1070 }
1071 if oldest.as_ref().is_none_or(|(_, oldest_time)| time < *oldest_time) {
1072 oldest = Some((line.commit.clone(), time));
1073 }
1074 }
1075 }
1076 let dominant = counts
1077 .iter()
1078 .max_by_key(|(commit, count)| (*count, *commit))
1079 .map(|(commit, count)| (commit.clone(), *count));
1080 let summary = ChunkBlameSummary {
1081 chunk_id,
1082 path: chunk.path,
1083 start_line: chunk.start_line,
1084 end_line: chunk.end_line,
1085 source_text_hash,
1086 line_count: i64::try_from(blame_lines.len()).unwrap_or(i64::MAX),
1087 dominant_commit: dominant.as_ref().map(|(commit, _)| commit.clone()),
1088 dominant_commit_lines: dominant.map(|(_, count)| count).unwrap_or(0),
1089 newest_commit: newest.as_ref().map(|(commit, _)| commit.clone()),
1090 newest_commit_time_s: newest.as_ref().map(|(_, time)| *time),
1091 oldest_commit: oldest.as_ref().map(|(commit, _)| commit.clone()),
1092 oldest_commit_time_s: oldest.as_ref().map(|(_, time)| *time),
1093 commit_counts: counts,
1094 evidence_kind: "historical",
1095 };
1096 git_history::store_blame(self.storage.connection(), &summary)?;
1097 Ok(Some(summary))
1098 }
1099
1100 pub fn github_sync_from_refs(&self, offline: bool) -> anyhow::Result<GitHubSyncReport> {
1101 self.github_sync_from_refs_with_progress(offline, |_| {})
1102 }
1103
1104 pub fn github_sync_from_refs_with_progress(
1105 &self,
1106 offline: bool,
1107 progress: impl FnMut(github::GitHubSyncProgress),
1108 ) -> anyhow::Result<GitHubSyncReport> {
1109 let Some(root) = self.storage.source_root() else {
1110 anyhow::bail!("index has no source_root metadata; rebuild required");
1111 };
1112 if offline {
1113 github::sync_from_refs::<github::GhCliGitHubClient>(
1114 self.storage.connection(),
1115 root,
1116 None,
1117 true,
1118 )
1119 } else {
1120 let client = github::GhCliGitHubClient;
1121 github::sync_from_refs_with_progress(
1122 self.storage.connection(),
1123 root,
1124 Some(&client),
1125 false,
1126 progress,
1127 )
1128 }
1129 }
1130
1131 pub fn github_sync_issue(
1132 &self,
1133 issue_ref: &str,
1134 offline: bool,
1135 ) -> anyhow::Result<GitHubSyncReport> {
1136 if offline {
1137 github::sync_issue::<github::GhCliGitHubClient>(
1138 self.storage.connection(),
1139 issue_ref,
1140 None,
1141 true,
1142 )
1143 } else {
1144 let client = github::GhCliGitHubClient;
1145 github::sync_issue(self.storage.connection(), issue_ref, Some(&client), false)
1146 }
1147 }
1148
1149 pub fn github_issue_search(
1150 &self,
1151 query: &str,
1152 limit: u32,
1153 ) -> anyhow::Result<Vec<GitHubEvidence>> {
1154 github::issue_search(self.storage.connection(), query, limit)
1155 }
1156
1157 pub fn rationale_search(&self, query: &str, limit: u32) -> anyhow::Result<Vec<GitHubEvidence>> {
1158 github::rationale_search(self.storage.connection(), query, limit)
1159 }
1160
1161 pub fn github_refs_for_path(
1162 &self,
1163 path: &str,
1164 limit: u32,
1165 ) -> anyhow::Result<Vec<github::GitHubRef>> {
1166 github::refs_for_path(self.storage.connection(), path, limit)
1167 }
1168
1169 pub fn github_sync_status(&self) -> anyhow::Result<GitHubStatus> {
1170 self.github_status()
1171 }
1172
1173 pub fn papertrail_for_chunk(
1174 &self,
1175 chunk_id: i64,
1176 limit: u32,
1177 ) -> anyhow::Result<Option<Papertrail>> {
1178 let Some(chunk) = self.read_chunk(chunk_id)? else {
1179 return Ok(None);
1180 };
1181 Ok(Some(github::papertrail_for_chunk(self.storage.connection(), &chunk, limit)?))
1182 }
1183
1184 pub fn papertrail_for_symbol(
1185 &self,
1186 symbol: &str,
1187 language: Option<Language>,
1188 limit: u32,
1189 ) -> anyhow::Result<Option<Papertrail>> {
1190 let Some(symbol) = self.symbols(symbol, language, limit)?.into_iter().next() else {
1191 return Ok(None);
1192 };
1193 Ok(Some(github::papertrail_for_symbol(self.storage.connection(), &symbol, limit)?))
1194 }
1195
1196 pub fn papertrail_for_selected_symbol(
1197 &self,
1198 symbol: &crate::query::symbol::SymbolHit,
1199 limit: u32,
1200 ) -> anyhow::Result<Papertrail> {
1201 github::papertrail_for_symbol(self.storage.connection(), symbol, limit)
1202 }
1203
1204 pub fn papertrail_for_commit(
1205 &self,
1206 commit_hash: &str,
1207 limit: u32,
1208 ) -> anyhow::Result<Papertrail> {
1209 github::papertrail_for_commit(self.storage.connection(), commit_hash, limit)
1210 }
1211
1212 pub fn local_ai_status(&self) -> anyhow::Result<LocalAiStatus> {
1213 ai::status(self.storage.connection())
1214 }
1215
1216 pub fn list_models(&self) -> anyhow::Result<Vec<ModelInfo>> {
1217 ai::models(self.storage.connection())
1218 }
1219
1220 pub fn install_model(&self, model_id: &str) -> anyhow::Result<ModelInfo> {
1221 ai::install_model(self.storage.connection(), model_id)
1222 }
1223
1224 pub fn reconcile(
1225 &self,
1226 limit: Option<u32>,
1227 batch_size: Option<u32>,
1228 ) -> anyhow::Result<ReconcileReport> {
1229 ai::reconcile(self.storage.connection(), limit, batch_size)
1230 }
1231
1232 pub fn reconcile_plan(&self) -> anyhow::Result<ReconcilePlan> {
1233 ai::reconcile_plan(self.storage.connection())
1234 }
1235
1236 pub fn reconcile_with_progress(
1237 &self,
1238 limit: Option<u32>,
1239 batch_size: Option<u32>,
1240 force: bool,
1241 progress: impl FnMut(ai::ReconcileProgress),
1242 ) -> anyhow::Result<ReconcileReport> {
1243 ai::reconcile_with_progress(self.storage.connection(), limit, batch_size, force, progress)
1244 }
1245
1246 pub fn reconcile_with_options_progress(
1247 &self,
1248 options: ai::ReconcileOptions,
1249 progress: impl FnMut(ai::ReconcileProgress),
1250 ) -> anyhow::Result<ReconcileReport> {
1251 ai::reconcile_with_options_progress(self.storage.connection(), options, progress)
1252 }
1253
1254 pub fn current_embedding_count(&self, model_id: &str) -> anyhow::Result<u64> {
1255 ai::current_embedding_count(self.storage.connection(), model_id)
1256 }
1257
1258 pub fn heal_index(&self, limit: Option<u32>) -> anyhow::Result<HealIndexReport> {
1259 let Some(root) = self.storage.source_root() else {
1260 anyhow::bail!("heal_index requires source_root metadata; run `rag-rat index` first");
1261 };
1262 let indexed_files = self.indexed_files()?;
1263 let max_repairs = limit.map(usize::try_from).transpose()?.unwrap_or(usize::MAX);
1264 let mut report = HealIndexReport {
1265 checked_files: 0,
1266 healed_files: 0,
1267 removed_files: 0,
1268 skipped_files: 0,
1269 fts_fresh: false,
1270 message: None,
1271 };
1272
1273 for file in indexed_files {
1274 report.checked_files += 1;
1275 let path = Path::new(&file.path);
1276 let full_path = root.join(path);
1277 let Ok(text) = fs::read_to_string(&full_path) else {
1278 if usize::try_from(report.healed_files + report.removed_files).unwrap_or(usize::MAX)
1279 >= max_repairs
1280 {
1281 report.message =
1282 Some("limit reached; rerun heal_index to continue".to_string());
1283 break;
1284 }
1285 self.mark_file_deleted(path)?;
1286 report.removed_files += 1;
1287 continue;
1288 };
1289 let sha256 = hex_sha256(text.as_bytes());
1290 if sha256 == file.sha256 {
1291 report.skipped_files += 1;
1292 continue;
1293 }
1294 if usize::try_from(report.healed_files + report.removed_files).unwrap_or(usize::MAX)
1295 >= max_repairs
1296 {
1297 report.message = Some("limit reached; rerun heal_index to continue".to_string());
1298 break;
1299 }
1300 self.heal_file(path)?;
1301 report.healed_files += 1;
1302 }
1303
1304 if report.healed_files > 0 || report.removed_files > 0 {
1305 self.sync_fts()?;
1306 } else {
1307 self.ensure_fts_fresh()?;
1308 }
1309 report.fts_fresh = !self.fts_dirty()?;
1310 Ok(report)
1311 }
1312
1313 pub fn ffi_surface(&self, limit: u32) -> anyhow::Result<Vec<crate::query::impact::ImpactItem>> {
1314 crate::query::impact::ffi_surface(self.storage.connection(), limit)
1315 }
1316
1317 pub fn find_callers(
1318 &self,
1319 symbol: &str,
1320 limit: u32,
1321 ) -> anyhow::Result<Vec<crate::query::graph::GraphHop>> {
1322 crate::query::graph::traverse(self.storage.connection(), symbol, true, limit)
1323 }
1324
1325 pub fn find_callers_with_options(
1326 &self,
1327 symbol: &str,
1328 limit: u32,
1329 options: &crate::query::graph::GraphTraversalOptions,
1330 ) -> anyhow::Result<Vec<crate::query::graph::GraphHop>> {
1331 let options = self.graph_options_with_logical_group(options)?;
1332 crate::query::graph::traverse_with_options(
1333 self.storage.connection(),
1334 symbol,
1335 true,
1336 limit,
1337 &options,
1338 )
1339 }
1340
1341 pub fn trace_callees(
1342 &self,
1343 symbol: &str,
1344 limit: u32,
1345 ) -> anyhow::Result<Vec<crate::query::graph::GraphHop>> {
1346 crate::query::graph::traverse(self.storage.connection(), symbol, false, limit)
1347 }
1348
1349 pub fn trace_callees_with_options(
1350 &self,
1351 symbol: &str,
1352 limit: u32,
1353 options: &crate::query::graph::GraphTraversalOptions,
1354 ) -> anyhow::Result<Vec<crate::query::graph::GraphHop>> {
1355 let options = self.graph_options_with_logical_group(options)?;
1356 crate::query::graph::traverse_with_options(
1357 self.storage.connection(),
1358 symbol,
1359 false,
1360 limit,
1361 &options,
1362 )
1363 }
1364
1365 pub fn graph_traversal_report(
1366 &self,
1367 tool: &str,
1368 symbol: &crate::query::symbol::SymbolHit,
1369 reverse: bool,
1370 limit: u32,
1371 options: &crate::query::graph::GraphTraversalOptions,
1372 ) -> anyhow::Result<crate::query::graph::GraphTraversalReport> {
1373 let options = self.graph_options_with_logical_group(options)?;
1374 let results = crate::query::graph::traverse_with_options(
1375 self.storage.connection(),
1376 &symbol.qualified_name,
1377 reverse,
1378 limit,
1379 &options,
1380 )?;
1381 let summary = crate::query::graph::traversal_summary(
1382 self.storage.connection(),
1383 &symbol.qualified_name,
1384 reverse,
1385 limit,
1386 &options,
1387 results.len(),
1388 )?;
1389 let (logical_symbol, variants) = self.graph_logical_symbol(options.logical_symbol_id)?;
1390 let mut paths = BTreeSet::new();
1391 paths.insert(symbol.path.clone());
1392 for result in &results {
1393 if let Some(callsite) = &result.callsite {
1394 paths.insert(callsite.path.clone());
1395 }
1396 }
1397 let mut coverage = self.graph_coverage(paths)?;
1398 if summary.unresolved > 0 {
1399 coverage.known_index_gaps.push(format!(
1400 "{} unresolved qualified callsites match the requested final segment but are not verified to this symbol",
1401 summary.unresolved
1402 ));
1403 }
1404 Ok(crate::query::graph::GraphTraversalReport {
1405 query: crate::query::graph::GraphTraversalQuery {
1406 tool: tool.to_string(),
1407 symbol_id: Some(symbol.symbol_id),
1408 logical_symbol_id: options.logical_symbol_id,
1409 symbol_path: symbol.qualified_name.clone(),
1410 resolution: options.resolution_mode.as_str().to_string(),
1411 },
1412 logical_symbol,
1413 variants,
1414 summary,
1415 coverage,
1416 results,
1417 })
1418 }
1419
1420 pub fn compare_graph_to_text(
1421 &self,
1422 symbol: &crate::query::symbol::SymbolHit,
1423 pattern: &str,
1424 limit: u32,
1425 options: &crate::query::graph::GraphTraversalOptions,
1426 include_tests: bool,
1427 ) -> anyhow::Result<crate::query::graph::CompareGraphTextReport> {
1428 let regex = Regex::new(pattern)?;
1429 let options = self.graph_options_with_logical_group(options)?;
1430 let mut graph_edges = crate::query::graph::traverse_with_options(
1431 self.storage.connection(),
1432 &symbol.qualified_name,
1433 true,
1434 limit,
1435 &options,
1436 )?;
1437 if !include_tests {
1438 graph_edges.retain(|edge| {
1439 edge.callsite.as_ref().is_none_or(|callsite| !is_test_like_path(&callsite.path))
1440 });
1441 }
1442 let (logical_symbol, variants) = self.graph_logical_symbol(options.logical_symbol_id)?;
1443 let text_hits = self.regex_hits(pattern, ®ex, include_tests)?;
1444 let text_by_location = text_hits
1445 .iter()
1446 .map(|hit| ((hit.path.clone(), hit.line), hit))
1447 .collect::<BTreeMap<_, _>>();
1448 let graph_by_location = graph_edges
1449 .iter()
1450 .filter_map(|edge| {
1451 edge.callsite
1452 .as_ref()
1453 .map(|callsite| ((callsite.path.clone(), callsite.line), edge))
1454 })
1455 .collect::<BTreeMap<_, _>>();
1456
1457 let mut paths = BTreeSet::new();
1458 paths.insert(symbol.path.clone());
1459 for hit in &text_hits {
1460 paths.insert(hit.path.clone());
1461 }
1462 for edge in &graph_edges {
1463 if let Some(callsite) = &edge.callsite {
1464 paths.insert(callsite.path.clone());
1465 }
1466 }
1467
1468 let parser_failure_paths = self
1469 .parser_failure_paths()?
1470 .into_iter()
1471 .map(|failure| failure.path)
1472 .collect::<BTreeSet<_>>();
1473 let mut matched_hits = Vec::new();
1474 let mut text_only_hits = Vec::new();
1475 let mut likely_parser_gaps = Vec::new();
1476 for hit in &text_hits {
1477 if let Some(edge) = graph_by_location.get(&(hit.path.clone(), hit.line)) {
1478 matched_hits.push(crate::query::graph::MatchedGraphTextHit {
1479 path: hit.path.clone(),
1480 line: hit.line,
1481 text: hit.text.clone(),
1482 target: edge.target.clone(),
1483 edge_kind: edge.edge_kind.clone(),
1484 confidence: edge.confidence.clone(),
1485 resolution: edge.resolution.clone(),
1486 });
1487 } else {
1488 let gap_kind = classify_text_only_hit(&hit.path, &hit.text, &parser_failure_paths);
1489 let text_only_hit = crate::query::graph::TextOnlyHit {
1490 path: hit.path.clone(),
1491 line: hit.line,
1492 text: hit.text.clone(),
1493 reason: if gap_kind == "parser_call_extraction" || gap_kind == "parser_failure"
1494 {
1495 "no graph edge extracted"
1496 } else {
1497 "text mention outside graph-call evidence"
1498 }
1499 .to_string(),
1500 likely_gap: gap_kind.to_string(),
1501 };
1502 if is_likely_parser_gap_kind(gap_kind) {
1503 likely_parser_gaps.push(text_only_hit.clone());
1504 }
1505 text_only_hits.push(text_only_hit);
1506 }
1507 }
1508
1509 let mut graph_only_edges = Vec::new();
1510 let mut likely_false_positives = Vec::new();
1511 for edge in &graph_edges {
1512 let Some(callsite) = &edge.callsite else {
1513 continue;
1514 };
1515 if text_by_location.contains_key(&(callsite.path.clone(), callsite.line)) {
1516 continue;
1517 }
1518 let current_line = self.current_line_text(&callsite.path, callsite.line)?;
1519 let graph_only = crate::query::graph::GraphOnlyEdge {
1520 path: callsite.path.clone(),
1521 line: callsite.line,
1522 target: edge.target.clone(),
1523 edge_kind: edge.edge_kind.clone(),
1524 confidence: edge.confidence.clone(),
1525 resolution: edge.resolution.clone(),
1526 evidence: edge.evidence.clone(),
1527 reason: "graph edge exists but pattern did not match text".to_string(),
1528 likely_reason: graph_only_reason(edge, current_line.as_deref()),
1529 };
1530 if is_likely_false_positive_graph_only(edge, &graph_only) {
1531 likely_false_positives.push(graph_only.clone());
1532 }
1533 graph_only_edges.push(graph_only);
1534 }
1535 let complete = likely_parser_gaps.is_empty() && likely_false_positives.is_empty();
1536 let recommended_fallback =
1537 recommended_graph_text_fallback(&likely_parser_gaps, &graph_only_edges);
1538 let pattern_match_mode = compare_pattern_match_mode(pattern, &symbol.name);
1539 let mut warnings = Vec::new();
1540 if pattern_match_mode == "substring_identifier" {
1541 warnings.push(format!(
1542 "pattern may match identifiers that merely contain `{}`; use an identifier boundary or escaped call suffix for exact text auditing",
1543 symbol.name
1544 ));
1545 }
1546
1547 Ok(crate::query::graph::CompareGraphTextReport {
1548 query: crate::query::graph::CompareGraphTextQuery {
1549 symbol_id: Some(symbol.symbol_id),
1550 logical_symbol_id: options.logical_symbol_id,
1551 symbol_path: symbol.qualified_name.clone(),
1552 pattern: pattern.to_string(),
1553 resolution: options.resolution_mode.as_str().to_string(),
1554 include_tests,
1555 },
1556 logical_symbol,
1557 variants,
1558 summary: crate::query::graph::CompareGraphTextSummary {
1559 graph_hits: u64::try_from(graph_edges.len()).unwrap_or(u64::MAX),
1560 graph_edges: u64::try_from(graph_edges.len()).unwrap_or(u64::MAX),
1561 text_hits: u64::try_from(text_hits.len()).unwrap_or(u64::MAX),
1562 matched: u64::try_from(matched_hits.len()).unwrap_or(u64::MAX),
1563 graph_only: u64::try_from(graph_only_edges.len()).unwrap_or(u64::MAX),
1564 text_only: u64::try_from(text_only_hits.len()).unwrap_or(u64::MAX),
1565 text_mentions: u64::try_from(text_only_hits.len() - likely_parser_gaps.len())
1566 .unwrap_or(u64::MAX),
1567 likely_parser_gaps: u64::try_from(likely_parser_gaps.len()).unwrap_or(u64::MAX),
1568 likely_false_positives: u64::try_from(likely_false_positives.len())
1569 .unwrap_or(u64::MAX),
1570 likely_index_gaps: u64::try_from(likely_parser_gaps.len()).unwrap_or(u64::MAX),
1571 complete,
1572 recommended_fallback,
1573 pattern_match_mode,
1574 warnings,
1575 },
1576 coverage: self.graph_coverage(paths)?,
1577 matched_hits,
1578 text_only_hits,
1579 graph_only_edges,
1580 likely_parser_gaps,
1581 likely_false_positives,
1582 })
1583 }
1584
1585 fn graph_logical_symbol(
1586 &self,
1587 logical_symbol_id: Option<i64>,
1588 ) -> anyhow::Result<(
1589 Option<crate::query::graph::LogicalSymbol>,
1590 Vec<crate::query::graph::LogicalSymbolVariant>,
1591 )> {
1592 let Some(logical_symbol_id) = logical_symbol_id else {
1593 return Ok((None, Vec::new()));
1594 };
1595 let Some(logical) = crate::query::symbol::lookup_logical_by_id(
1596 self.storage.connection(),
1597 logical_symbol_id,
1598 )?
1599 else {
1600 return Ok((None, Vec::new()));
1601 };
1602 let variants = crate::query::symbol::logical_members(
1603 self.storage.connection(),
1604 logical.logical_symbol_id,
1605 )?
1606 .into_iter()
1607 .map(|member| crate::query::graph::LogicalSymbolVariant {
1608 symbol_id: member.symbol_id,
1609 cfg_expr: member.cfg_expr,
1610 signature_hash: member.signature_hash,
1611 start_line: member.start_line,
1612 end_line: member.end_line,
1613 })
1614 .collect::<Vec<_>>();
1615 Ok((
1616 Some(crate::query::graph::LogicalSymbol {
1617 logical_symbol_id: logical.logical_symbol_id,
1618 qualified_name: logical.qualified_name,
1619 variant_count: logical.variant_count,
1620 group_reason: logical.group_reason,
1621 }),
1622 variants,
1623 ))
1624 }
1625
1626 fn graph_options_with_logical_group(
1627 &self,
1628 options: &crate::query::graph::GraphTraversalOptions,
1629 ) -> anyhow::Result<crate::query::graph::GraphTraversalOptions> {
1630 if options.logical_symbol_id.is_some() {
1631 return Ok(options.clone());
1632 }
1633 let Some(symbol_id) = options.symbol_id else {
1634 return Ok(options.clone());
1635 };
1636 let Some(logical) =
1637 crate::query::symbol::logical_for_symbol_id(self.storage.connection(), symbol_id)?
1638 else {
1639 return Ok(options.clone());
1640 };
1641 let mut options = options.clone();
1642 options.logical_symbol_id = Some(logical.logical_symbol_id);
1643 Ok(options)
1644 }
1645
1646 fn local_symbol_context_hits(
1647 &self,
1648 symbol: &crate::query::symbol::SymbolHit,
1649 limit: u32,
1650 ) -> anyhow::Result<Vec<SearchHit>> {
1651 let mut stmt = self.storage.connection().prepare(
1652 "
1653 SELECT chunks.id, files.path, files.language, files.kind,
1654 chunks.start_line, chunks.end_line, chunks.symbol_path, chunks.text
1655 FROM chunks
1656 JOIN files ON files.id = chunks.file_id
1657 WHERE files.path = ?1
1658 AND (
1659 chunks.symbol_path = ?2
1660 OR chunks.symbol_path LIKE ?3
1661 OR chunks.text LIKE ?4
1662 )
1663 ORDER BY
1664 CASE
1665 WHEN chunks.symbol_path = ?2 THEN 0
1666 WHEN chunks.symbol_path LIKE ?3 THEN 1
1667 ELSE 2
1668 END,
1669 chunks.start_line
1670 LIMIT ?5
1671 ",
1672 )?;
1673 let rows = stmt.query_map(
1674 params![
1675 symbol.path,
1676 symbol.qualified_name,
1677 format!("%{}%", symbol.name),
1678 format!("%{}%", symbol.name),
1679 i64::from(limit.max(1)),
1680 ],
1681 |row| {
1682 let text: String = row.get(7)?;
1683 Ok(SearchHit {
1684 chunk_id: row.get(0)?,
1685 path: row.get(1)?,
1686 language: row.get(2)?,
1687 kind: row.get(3)?,
1688 start_line: row.get(4)?,
1689 end_line: row.get(5)?,
1690 symbol_path: row.get(6)?,
1691 score: 1.0,
1692 summary: bounded_summary(&text),
1693 graph: None,
1694 score_components: None,
1695 })
1696 },
1697 )?;
1698 let mut hits = Vec::new();
1699 for row in rows {
1700 hits.push(row?);
1701 }
1702 Ok(hits)
1703 }
1704
1705 pub fn impact_surface(
1706 &self,
1707 query: &str,
1708 limit: u32,
1709 ) -> anyhow::Result<Vec<crate::query::impact::ImpactItem>> {
1710 crate::query::impact::impact_surface(self.storage.connection(), query, limit)
1711 }
1712
1713 pub fn impact_surface_with_options(
1714 &self,
1715 query: &str,
1716 limit: u32,
1717 resolution_mode: crate::query::graph::GraphResolutionMode,
1718 ) -> anyhow::Result<Vec<crate::query::impact::ImpactItem>> {
1719 crate::query::impact::impact_surface_with_options(
1720 self.storage.connection(),
1721 query,
1722 limit,
1723 resolution_mode,
1724 )
1725 }
1726
1727 pub fn impact_surface_for_selected_symbol(
1728 &self,
1729 symbol: &crate::query::symbol::SymbolHit,
1730 limit: u32,
1731 resolution_mode: crate::query::graph::GraphResolutionMode,
1732 ) -> anyhow::Result<Vec<crate::query::impact::ImpactItem>> {
1733 crate::query::impact::impact_surface_for_symbol(
1734 self.storage.connection(),
1735 symbol,
1736 limit,
1737 resolution_mode,
1738 )
1739 }
1740
1741 pub fn impact_surface_report_for_selected_symbol(
1742 &self,
1743 symbol: &crate::query::symbol::SymbolHit,
1744 limit: u32,
1745 options: &crate::query::impact::ImpactSurfaceOptions,
1746 ) -> anyhow::Result<crate::query::impact::ImpactSurfaceReport> {
1747 crate::query::impact::impact_surface_report_for_symbol(
1748 self.storage.connection(),
1749 symbol,
1750 limit,
1751 options,
1752 )
1753 }
1754
1755 pub fn repo_brief(
1756 &self,
1757 options: crate::query::repo_brief::RepoBriefOptions,
1758 ) -> anyhow::Result<crate::query::repo_brief::RepoBrief> {
1759 crate::query::repo_brief::repo_brief(self.storage.connection(), options)
1760 }
1761
1762 pub fn repo_clusters(
1763 &self,
1764 options: crate::query::clusters::RepoClustersOptions,
1765 ) -> anyhow::Result<crate::query::clusters::RepoClustersReport> {
1766 crate::query::clusters::repo_clusters(self.storage.connection(), options)
1767 }
1768
1769 pub fn memory_create(
1770 &self,
1771 request: crate::query::memory::RepoMemoryCreate,
1772 ) -> anyhow::Result<crate::query::memory::RepoMemoryCreateResult> {
1773 crate::query::memory::create_memory(self.storage.connection(), request)
1774 }
1775
1776 pub fn memory_update(
1777 &self,
1778 update: crate::query::memory::RepoMemoryUpdate,
1779 ) -> anyhow::Result<crate::query::memory::RepoMemory> {
1780 crate::query::memory::update_memory(self.storage.connection(), update)
1781 }
1782
1783 pub fn memory_mark_obsolete(
1784 &self,
1785 memory_id: &str,
1786 ) -> anyhow::Result<crate::query::memory::RepoMemory> {
1787 crate::query::memory::mark_obsolete(self.storage.connection(), memory_id)
1788 }
1789
1790 pub fn memory_search(
1791 &self,
1792 query: &str,
1793 limit: u32,
1794 ) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
1795 crate::query::memory::memory_search(self.storage.connection(), query, limit)
1796 }
1797
1798 pub fn memory_for_symbol(
1799 &self,
1800 symbol: &crate::query::symbol::SymbolHit,
1801 limit: u32,
1802 ) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
1803 crate::query::memory::memories_for_symbol(self.storage.connection(), symbol, limit)
1804 }
1805
1806 pub fn memory_for_path(
1807 &self,
1808 path: &str,
1809 limit: u32,
1810 ) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
1811 crate::query::memory::memories_for_path(self.storage.connection(), path, limit)
1812 }
1813
1814 pub fn memory_for_edges(
1815 &self,
1816 edge_ids: &[i64],
1817 limit: u32,
1818 ) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
1819 crate::query::memory::memories_for_edges(self.storage.connection(), edge_ids, limit)
1820 }
1821
1822 pub fn memory_evidence_for_symbol_and_edges(
1823 &self,
1824 symbol: &crate::query::symbol::SymbolHit,
1825 edge_ids: &[i64],
1826 limit: u32,
1827 ) -> anyhow::Result<crate::query::memory::RepoMemoryEvidence> {
1828 crate::query::memory::memory_evidence_for_symbol_and_edges(
1829 self.storage.connection(),
1830 symbol,
1831 edge_ids,
1832 limit,
1833 )
1834 }
1835
1836 pub fn memory_for_call_path_hash(
1837 &self,
1838 edge_sequence_hash: &str,
1839 limit: u32,
1840 ) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
1841 crate::query::memory::memories_for_call_path_hash(
1842 self.storage.connection(),
1843 edge_sequence_hash,
1844 limit,
1845 )
1846 }
1847
1848 pub fn memory_validate(
1849 &self,
1850 ) -> anyhow::Result<crate::query::memory::RepoMemoryValidationReport> {
1851 crate::query::memory::validate_memories(self.storage.connection())
1852 }
1853
1854 pub fn rebuild_fts(&self) -> anyhow::Result<()> {
1855 schema::rebuild_fts(self.storage.connection())?;
1856 self.record_content_revision()?;
1857 self.record_fts_current()?;
1858 self.set_meta("fts_dirty", "false")?;
1859 Ok(())
1860 }
1861
1862 pub fn sync_fts(&self) -> anyhow::Result<()> {
1863 self.record_content_revision()?;
1864 self.record_fts_current()?;
1865 self.set_meta("fts_dirty", "false")?;
1866 Ok(())
1867 }
1868
1869 fn record_fts_current(&self) -> anyhow::Result<()> {
1870 self.set_meta("fts_synced_at_ms", &now_ms().to_string())?;
1871 let revision = self.content_revision()?;
1872 self.set_meta("fts_source_revision", &revision)?;
1873 Ok(())
1874 }
1875
1876 fn record_content_revision(&self) -> anyhow::Result<String> {
1877 let revision = self.content_revision()?;
1878 self.set_meta("content_revision", &revision)?;
1879 Ok(revision)
1880 }
1881
1882 pub fn heal_file(&self, path: &Path) -> anyhow::Result<()> {
1883 let Some(root) = self.storage.source_root() else {
1884 anyhow::bail!("index has no source_root metadata; rebuild required");
1885 };
1886 let row = self.file_row(path)?;
1887 let full_path = root.join(path);
1888 let text = fs::read_to_string(&full_path)?;
1889
1890 let changes = git_changed_paths(root).unwrap_or_default();
1891 let is_dirty = changes.changed.contains(path);
1892 let has_base_commit = !self.active_commit_sha.is_empty();
1893 let scope = if !has_base_commit || is_dirty {
1894 FileScope::worktree(self.active_worktree_id.clone())
1895 } else {
1896 FileScope::commit(self.active_commit_sha.clone())
1897 };
1898 self.remove_file_in_scope(path, &scope.commit_sha, &scope.worktree_id)?;
1899
1900 self.index_file(
1901 path,
1902 row.language,
1903 row.kind,
1904 file_metadata_ms(&full_path)?,
1905 &text,
1906 &scope,
1907 )?;
1908 self.rebuild_logical_symbols()?;
1909 self.resolve_edges()
1910 }
1911
1912 fn index_file(
1913 &self,
1914 path: &Path,
1915 language: Language,
1916 kind: TargetKind,
1917 modified_at_ms: i64,
1918 text: &str,
1919 scope: &FileScope,
1920 ) -> anyhow::Result<()> {
1921 if language != Language::Markdown && kind != TargetKind::Generated {
1922 if text.len() > chunker::MAX_STRUCTURAL_PARSE_BYTES {
1923 } else if let Some(message) = parser::parse_error(path, language, text)
1926 .unwrap_or_else(|err| Some(err.to_string()))
1927 {
1928 self.insert_parser_failure(path, language, &message)?;
1929 }
1930 }
1931 let sha256 = hex_sha256(text.as_bytes());
1932 let file_id = self.storage.connection().query_row(
1933 "INSERT INTO main.files(path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms, indexed_revision, commit_sha, worktree_id)
1934 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)
1935 RETURNING id",
1936 params![
1937 path_string(path),
1938 language.as_str(),
1939 kind.as_str(),
1940 sha256,
1941 modified_at_ms,
1942 matches!(kind, TargetKind::Generated),
1943 now_ms(),
1944 sha256,
1945 &scope.commit_sha,
1946 &scope.worktree_id,
1947 ],
1948 |row| row.get::<_, i64>(0),
1949 )?;
1950 let chunks = if kind == TargetKind::Generated {
1951 chunker::generated_chunks_for_file(path, text)
1952 } else {
1953 chunker::chunks_for_file(path, language, text)
1954 };
1955 let symbols =
1956 if kind == TargetKind::Generated || text.len() > chunker::MAX_STRUCTURAL_PARSE_BYTES {
1957 Vec::new()
1958 } else {
1959 symbols::symbols_for_file(path, language, text)
1960 };
1961 self.insert_chunks(file_id, &sha256, &chunks, text)?;
1962 self.insert_symbols(file_id, language, &symbols)?;
1963 if kind != TargetKind::Generated && text.len() <= edges::MAX_GRAPH_PARSE_BYTES {
1964 edges::index_file_edges(self.storage.connection(), file_id, path, language, text)?;
1965 }
1966 self.mark_fts_dirty()?;
1967 Ok(())
1968 }
1969
1970 fn insert_prepared_file(&self, prepared_file: &PreparedIndexFile) -> anyhow::Result<()> {
1971 let file = &prepared_file.file;
1972 let prepared = match &prepared_file.prepared {
1973 Ok(prepared) => prepared,
1974 Err(err) => {
1975 self.insert_parser_failure(&file.relative_path, file.language, &err.to_string())?;
1976 return Ok(());
1977 },
1978 };
1979 if let Some(message) = &prepared.parser_failure {
1980 self.insert_parser_failure(&file.relative_path, file.language, message)?;
1981 }
1982 let file_id = self.storage.connection().query_row(
1983 "INSERT INTO main.files(path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms, indexed_revision, commit_sha, worktree_id)
1984 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)
1985 RETURNING id",
1986 params![
1987 path_string(&file.relative_path),
1988 file.language.as_str(),
1989 file.kind.as_str(),
1990 prepared.sha256,
1991 prepared.modified_at_ms,
1992 matches!(file.kind, TargetKind::Generated),
1993 now_ms(),
1994 prepared.sha256,
1995 file.commit_sha,
1996 file.worktree_id,
1997 ],
1998 |row| row.get::<_, i64>(0),
1999 )?;
2000 self.insert_chunks(file_id, &prepared.sha256, &prepared.chunks, &prepared.text)?;
2001 self.insert_symbols(file_id, file.language, &prepared.symbols)?;
2002 if file.kind != TargetKind::Generated && prepared.text.len() <= edges::MAX_GRAPH_PARSE_BYTES
2003 {
2004 edges::index_file_edges(
2005 self.storage.connection(),
2006 file_id,
2007 &file.relative_path,
2008 file.language,
2009 &prepared.text,
2010 )?;
2011 }
2012 self.mark_fts_dirty()?;
2013 Ok(())
2014 }
2015
2016 fn insert_chunks(
2017 &self,
2018 file_id: i64,
2019 source_revision: &str,
2020 chunks: &[Chunk],
2021 full_text: &str,
2022 ) -> anyhow::Result<()> {
2023 let (path, language, kind) = self.storage.connection().query_row(
2024 "SELECT path, language, kind FROM main.files WHERE id = ?1",
2025 [file_id],
2026 |row| {
2027 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?, row.get::<_, String>(2)?))
2028 },
2029 )?;
2030 for chunk in chunks {
2031 let anchor =
2032 anchors::anchor_for_text(&chunk.text, chunk.start_line, chunk.end_line, full_text);
2033 let embedding_policy = ai::embedding_policy_for_chunk(
2034 Path::new(&path),
2035 &language,
2036 &kind,
2037 chunk.kind,
2038 chunk.symbol_path.as_deref(),
2039 &chunk.text,
2040 ai::DEFAULT_MAX_EMBEDDING_CHARS,
2041 );
2042 self.storage.connection().execute(
2043 "INSERT INTO chunks(file_id, chunk_kind, symbol_path, start_byte, end_byte, start_line, end_line, text, text_hash,
2044 source_revision, anchor_version, normalized_hash, start_boundary_hash, end_boundary_hash,
2045 start_context_hash, end_context_hash, context_radius, embedding_policy, embedding_priority)
2046 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19)",
2047 params![
2048 file_id,
2049 chunk.kind,
2050 chunk.symbol_path,
2051 i64::try_from(chunk.start_byte)?,
2052 i64::try_from(chunk.end_byte)?,
2053 i64::try_from(chunk.start_line)?,
2054 i64::try_from(chunk.end_line)?,
2055 chunk.text,
2056 hex_sha256(chunk.text.as_bytes()),
2057 source_revision,
2058 anchor.version,
2059 anchor.normalized_hash,
2060 anchor.start_boundary_hash,
2061 anchor.end_boundary_hash,
2062 anchor.start_context_hash,
2063 anchor.end_context_hash,
2064 anchor.context_radius,
2065 embedding_policy.policy,
2066 embedding_policy.priority,
2067 ],
2068 )?;
2069 let chunk_id = self.storage.connection().last_insert_rowid();
2070 self.storage.connection().execute(
2071 "INSERT INTO chunk_fts(rowid, text) VALUES (?1, ?2)",
2072 params![chunk_id, chunk.text],
2073 )?;
2074 }
2075 Ok(())
2076 }
2077
2078 fn insert_symbols(
2079 &self,
2080 file_id: i64,
2081 language: Language,
2082 symbols: &[Symbol],
2083 ) -> anyhow::Result<()> {
2084 for symbol in symbols {
2085 self.storage.connection().execute(
2086 "INSERT INTO symbols(file_id, language, name, qualified_name, kind, start_byte, end_byte, signature, docs)
2087 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)",
2088 params![
2089 file_id,
2090 language.as_str(),
2091 symbol.name,
2092 symbol.qualified_name,
2093 symbol.kind,
2094 i64::try_from(symbol.start_byte)?,
2095 i64::try_from(symbol.end_byte)?,
2096 symbol.signature,
2097 symbol.docs,
2098 ],
2099 )?;
2100 let symbol_id = self.storage.connection().last_insert_rowid();
2101 for fact in &symbol.facts {
2102 self.storage.connection().execute(
2103 "INSERT OR IGNORE INTO symbol_facts(symbol_id, fact_kind, fact_value)
2104 VALUES (?1, ?2, ?3)",
2105 params![symbol_id, fact.kind, fact.value],
2106 )?;
2107 }
2108 }
2109 Ok(())
2110 }
2111
2112 fn write_git_meta(&self, root: &Path) -> anyhow::Result<()> {
2113 self.set_meta("git_commit", &git_output(root, &["rev-parse", "HEAD"]).unwrap_or_default())?;
2114 let dirty = !git_output(root, &["status", "--porcelain"]).unwrap_or_default().is_empty();
2115 self.set_meta("git_dirty", if dirty { "true" } else { "false" })?;
2116 Ok(())
2117 }
2118
2119 fn apply_prepared_git_history(
2120 &self,
2121 root: &Path,
2122 handle: JoinHandle<anyhow::Result<git_history::PreparedGitHistory>>,
2123 ) -> anyhow::Result<GitHistoryIndexStatus> {
2124 let prepared = join_git_history_prepare(handle)?;
2125 git_history::apply_prepared(self.storage.connection(), root, prepared)
2126 }
2127
2128 fn git_history_status(&self) -> anyhow::Result<GitHistoryIndexStatus> {
2129 let Some(root) = self.storage.source_root() else {
2130 return git_history::status(self.storage.connection(), Path::new("."));
2131 };
2132 git_history::status(self.storage.connection(), root)
2133 }
2134
2135 fn github_status(&self) -> anyhow::Result<GitHubStatus> {
2136 github::status(self.storage.connection())
2137 }
2138
2139 fn mark_fts_dirty(&self) -> anyhow::Result<()> {
2140 self.set_meta("fts_dirty", "true")
2141 }
2142
2143 fn resolve_edges(&self) -> anyhow::Result<()> {
2144 edges::resolve_all_edges(self.storage.connection())
2145 }
2146
2147 fn rebuild_logical_symbols(&self) -> anyhow::Result<()> {
2148 self.storage.connection().execute_batch(
2149 "
2150 CREATE TEMP TABLE IF NOT EXISTS logical_symbols_to_rebuild(id INTEGER PRIMARY KEY);
2151 DELETE FROM temp.logical_symbols_to_rebuild;
2152 INSERT OR IGNORE INTO temp.logical_symbols_to_rebuild(id)
2153 SELECT logical_symbol_members.logical_symbol_id
2154 FROM main.logical_symbol_members
2155 JOIN main.symbols ON symbols.id = logical_symbol_members.symbol_id
2156 JOIN files ON files.id = symbols.file_id;
2157 DELETE FROM main.logical_symbol_members
2158 WHERE logical_symbol_id IN (
2159 SELECT id FROM temp.logical_symbols_to_rebuild
2160 );
2161 DELETE FROM main.logical_symbols
2162 WHERE id IN (
2163 SELECT id FROM temp.logical_symbols_to_rebuild
2164 );
2165 DELETE FROM temp.logical_symbols_to_rebuild;
2166 ",
2167 )?;
2168
2169 let mut stmt = self.storage.connection().prepare(
2170 "
2171 SELECT symbols.id, symbols.file_id, files.path, symbols.language, symbols.name,
2172 symbols.qualified_name, symbols.kind, symbols.start_byte, symbols.end_byte,
2173 symbols.signature,
2174 COALESCE((
2175 SELECT chunks.start_byte
2176 FROM chunks
2177 WHERE chunks.file_id = symbols.file_id
2178 AND symbols.start_byte >= chunks.start_byte
2179 AND symbols.start_byte < chunks.end_byte
2180 ORDER BY chunks.end_byte - chunks.start_byte ASC
2181 LIMIT 1
2182 ), symbols.start_byte) AS chunk_start_byte,
2183 COALESCE((
2184 SELECT chunks.start_line
2185 FROM chunks
2186 WHERE chunks.file_id = symbols.file_id
2187 AND symbols.start_byte >= chunks.start_byte
2188 AND symbols.start_byte < chunks.end_byte
2189 ORDER BY chunks.end_byte - chunks.start_byte ASC
2190 LIMIT 1
2191 ), 1) AS chunk_start_line,
2192 COALESCE((
2193 SELECT chunks.text
2194 FROM chunks
2195 WHERE chunks.file_id = symbols.file_id
2196 AND symbols.start_byte >= chunks.start_byte
2197 AND symbols.start_byte < chunks.end_byte
2198 ORDER BY chunks.end_byte - chunks.start_byte ASC
2199 LIMIT 1
2200 ), '') AS chunk_text
2201 FROM symbols
2202 JOIN files ON files.id = symbols.file_id
2203 ORDER BY files.path, symbols.language, symbols.qualified_name, symbols.kind,
2204 symbols.start_byte, symbols.end_byte
2205 ",
2206 )?;
2207 let rows = stmt.query_map([], |row| {
2208 let start_byte = usize::try_from(row.get::<_, i64>(7)?).unwrap_or(0);
2209 let end_byte = usize::try_from(row.get::<_, i64>(8)?).unwrap_or(0);
2210 let chunk_start_byte = usize::try_from(row.get::<_, i64>(10)?).unwrap_or(start_byte);
2211 let chunk_start_line = row.get::<_, i64>(11)?;
2212 let chunk_text: String = row.get(12)?;
2213 let start_line =
2214 symbol_line_for_byte(&chunk_text, chunk_start_byte, chunk_start_line, start_byte);
2215 let end_line =
2216 symbol_line_for_byte(&chunk_text, chunk_start_byte, chunk_start_line, end_byte);
2217 Ok(LogicalSymbolMemberRow {
2218 symbol_id: row.get(0)?,
2219 path: row.get(2)?,
2220 language: row.get(3)?,
2221 name: row.get(4)?,
2222 qualified_name: row.get(5)?,
2223 kind: row.get(6)?,
2224 signature: row.get(9)?,
2225 start_line,
2226 end_line,
2227 })
2228 })?;
2229 let mut groups: BTreeMap<LogicalSymbolKey, Vec<LogicalSymbolMemberRow>> = BTreeMap::new();
2230 for row in rows {
2231 let row = row?;
2232 groups.entry(LogicalSymbolKey::from(&row)).or_default().push(row);
2233 }
2234 for (key, members) in groups {
2235 let group_reason = if members.len() > 1 { "cfg_variant" } else { "single" };
2236 self.storage.connection().execute(
2237 "
2238 INSERT INTO logical_symbols(language, path, logical_name, qualified_name, kind, variant_count, group_reason)
2239 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)
2240 ",
2241 params![
2242 key.language,
2243 key.path,
2244 key.name,
2245 key.qualified_name,
2246 key.kind,
2247 i64::try_from(members.len()).unwrap_or(i64::MAX),
2248 group_reason,
2249 ],
2250 )?;
2251 let logical_symbol_id = self.storage.connection().last_insert_rowid();
2252 for member in members {
2253 let signature_hash =
2254 member.signature.as_deref().map(|signature| hex_sha256(signature.as_bytes()));
2255 self.storage.connection().execute(
2256 "
2257 INSERT INTO logical_symbol_members(
2258 logical_symbol_id, symbol_id, cfg_expr, signature_hash, start_line, end_line
2259 )
2260 VALUES (?1, ?2, NULL, ?3, ?4, ?5)
2261 ",
2262 params![
2263 logical_symbol_id,
2264 member.symbol_id,
2265 signature_hash,
2266 member.start_line,
2267 member.end_line,
2268 ],
2269 )?;
2270 }
2271 }
2272 Ok(())
2273 }
2274
2275 fn graph_coverage(
2276 &self,
2277 paths: BTreeSet<String>,
2278 ) -> anyhow::Result<crate::query::graph::GraphCoverage> {
2279 let indexed_files =
2280 self.storage
2281 .connection()
2282 .query_row("SELECT COUNT(*) FROM files", [], |row| row.get::<_, i64>(0))?;
2283 let parser_failure_paths = self.parser_failure_paths()?;
2284 let parser_failures = u64::try_from(parser_failure_paths.len()).unwrap_or(0);
2285 let known_index_gaps = parser_failure_paths
2286 .iter()
2287 .map(|failure| {
2288 format!(
2289 "{} parser failed for {}: {}",
2290 failure.language, failure.path, failure.message
2291 )
2292 })
2293 .collect::<Vec<_>>();
2294 let mut stale_files = 0_u64;
2295 let mut parser_coverage_for_paths = Vec::new();
2296 for path in paths {
2297 let Some(row) = self.graph_path_row(&path)? else {
2298 parser_coverage_for_paths.push(crate::query::graph::GraphPathCoverage {
2299 path,
2300 language: "unknown".to_string(),
2301 parser_status: "missing_from_index".to_string(),
2302 graph_status: "missing_from_index".to_string(),
2303 last_indexed_revision: None,
2304 });
2305 continue;
2306 };
2307 let stale = self.source_path_is_stale(&path, &row.sha256);
2308 if stale {
2309 stale_files += 1;
2310 }
2311 let parser_failed = parser_failure_paths.iter().any(|failure| failure.path == path);
2312 parser_coverage_for_paths.push(crate::query::graph::GraphPathCoverage {
2313 path,
2314 language: row.language,
2315 parser_status: if parser_failed { "failed" } else { "ok" }.to_string(),
2316 graph_status: if stale {
2317 "stale_source"
2318 } else if parser_failed {
2319 "parser_failed"
2320 } else {
2321 "ok"
2322 }
2323 .to_string(),
2324 last_indexed_revision: (!row.indexed_revision.is_empty())
2325 .then_some(row.indexed_revision),
2326 });
2327 }
2328 Ok(crate::query::graph::GraphCoverage {
2329 indexed_files: u64::try_from(indexed_files).unwrap_or(0),
2330 parser_failures,
2331 stale_files,
2332 known_index_gaps,
2333 parser_coverage_for_paths,
2334 })
2335 }
2336
2337 fn graph_path_row(&self, path: &str) -> anyhow::Result<Option<GraphPathRow>> {
2338 self.storage
2339 .connection()
2340 .query_row(
2341 "SELECT language, sha256, indexed_revision FROM files WHERE path = ?1",
2342 [path],
2343 |row| {
2344 Ok(GraphPathRow {
2345 language: row.get(0)?,
2346 sha256: row.get(1)?,
2347 indexed_revision: row.get(2)?,
2348 })
2349 },
2350 )
2351 .optional()
2352 .map_err(Into::into)
2353 }
2354
2355 fn source_path_is_stale(&self, path: &str, indexed_sha256: &str) -> bool {
2356 let Some(root) = self.storage.source_root() else {
2357 return false;
2358 };
2359 let Ok(bytes) = fs::read(root.join(path)) else {
2360 return true;
2361 };
2362 hex_sha256(&bytes) != indexed_sha256
2363 }
2364
2365 fn regex_hits(
2366 &self,
2367 pattern: &str,
2368 regex: &Regex,
2369 include_tests: bool,
2370 ) -> anyhow::Result<Vec<crate::query::graph::TextOnlyHit>> {
2371 let Some(root) = self.storage.source_root() else {
2372 anyhow::bail!("cannot compare graph to text: source_root is missing from index_meta");
2373 };
2374 let mut stmt = self.storage.connection().prepare("SELECT path FROM files ORDER BY path")?;
2375 let paths =
2376 stmt.query_map([], |row| row.get::<_, String>(0))?.collect::<Result<Vec<_>, _>>()?;
2377 let mut hits = Vec::new();
2378 for path in paths {
2379 if !include_tests && is_test_like_path(&path) {
2380 continue;
2381 }
2382 let full_path = root.join(&path);
2383 let Ok(text) = fs::read_to_string(&full_path) else {
2384 continue;
2385 };
2386 for (index, line) in text.lines().enumerate() {
2387 if regex.is_match(line) {
2388 hits.push(crate::query::graph::TextOnlyHit {
2389 path: path.clone(),
2390 line: i64::try_from(index + 1).unwrap_or(i64::MAX),
2391 text: line.trim().to_string(),
2392 reason: "text pattern matched".to_string(),
2393 likely_gap: pattern.to_string(),
2394 });
2395 }
2396 }
2397 }
2398 Ok(hits)
2399 }
2400
2401 fn current_line_text(&self, path: &str, line: i64) -> anyhow::Result<Option<String>> {
2402 let Some(root) = self.storage.source_root() else {
2403 return Ok(None);
2404 };
2405 let Ok(text) = fs::read_to_string(root.join(path)) else {
2406 return Ok(None);
2407 };
2408 let Some(index) = usize::try_from(line.saturating_sub(1)).ok() else {
2409 return Ok(None);
2410 };
2411 Ok(text.lines().nth(index).map(|line| line.trim().to_string()))
2412 }
2413
2414 fn ensure_graph_index_current(&self) -> anyhow::Result<()> {
2415 if self.meta("graph_index_version")?.as_deref() == Some(GRAPH_INDEX_VERSION) {
2416 return Ok(());
2417 }
2418 let Some(root) = self.storage.source_root().map(Path::to_path_buf) else {
2419 return Ok(());
2420 };
2421 self.storage.execute_batch("BEGIN IMMEDIATE TRANSACTION")?;
2422 let result = (|| -> anyhow::Result<()> {
2423 self.storage.connection().execute("DELETE FROM edges", [])?;
2424 let files = self.graph_reindex_files()?;
2425 for file in files {
2426 if file.kind == TargetKind::Generated || file.language == Language::Markdown {
2427 continue;
2428 }
2429 let full_path = root.join(&file.path);
2430 let Ok(text) = fs::read_to_string(full_path) else {
2431 continue;
2432 };
2433 if text.len() > edges::MAX_GRAPH_PARSE_BYTES {
2434 continue;
2435 }
2436 edges::index_file_edges(
2437 self.storage.connection(),
2438 file.id,
2439 Path::new(&file.path),
2440 file.language,
2441 &text,
2442 )?;
2443 }
2444 self.resolve_edges()?;
2445 self.mark_graph_index_current()?;
2446 Ok(())
2447 })();
2448 if result.is_err() {
2449 let _ = self.storage.execute_batch("ROLLBACK");
2450 }
2451 result?;
2452 self.storage.execute_batch("COMMIT")?;
2453 Ok(())
2454 }
2455
2456 fn mark_graph_index_current(&self) -> anyhow::Result<()> {
2457 self.set_meta("graph_index_version", GRAPH_INDEX_VERSION)
2458 }
2459
2460 fn set_meta(&self, key: &str, value: &str) -> anyhow::Result<()> {
2461 self.storage.connection().execute(
2462 "INSERT INTO index_meta(key, value) VALUES (?1, ?2)
2463 ON CONFLICT(key) DO UPDATE SET value = excluded.value",
2464 params![key, value],
2465 )?;
2466 Ok(())
2467 }
2468
2469 fn meta(&self, key: &str) -> anyhow::Result<Option<String>> {
2470 meta_for(self.storage.connection(), key)
2471 }
2472
2473 fn insert_parser_failure(
2474 &self,
2475 path: &Path,
2476 language: Language,
2477 message: &str,
2478 ) -> anyhow::Result<()> {
2479 self.storage.connection().execute(
2480 "INSERT INTO parser_failures(path, language, message) VALUES (?1, ?2, ?3)",
2481 params![path_string(path), language.as_str(), message],
2482 )?;
2483 Ok(())
2484 }
2485
2486 fn parser_failure_count(&self) -> anyhow::Result<u64> {
2487 let count = self.storage.connection().query_row(
2488 "SELECT COUNT(*) FROM parser_failures",
2489 [],
2490 |row| row.get::<_, i64>(0),
2491 )?;
2492 Ok(u64::try_from(count).unwrap_or(0))
2493 }
2494
2495 fn parser_failure_paths(&self) -> anyhow::Result<Vec<ParserFailure>> {
2496 let mut stmt = self.storage.connection().prepare(
2497 "SELECT path, language, message FROM parser_failures ORDER BY path, language, message",
2498 )?;
2499 let rows = stmt.query_map([], |row| {
2500 Ok(ParserFailure { path: row.get(0)?, language: row.get(1)?, message: row.get(2)? })
2501 })?;
2502 let mut failures = Vec::new();
2503 for row in rows {
2504 failures.push(row?);
2505 }
2506 Ok(failures)
2507 }
2508
2509 fn search_with_heal(
2510 &self,
2511 query: &str,
2512 limit: u32,
2513 include_generated: bool,
2514 allow_heal: bool,
2515 explain: bool,
2516 options: SearchOptions,
2517 ) -> anyhow::Result<Vec<SearchHit>> {
2518 let hits = crate::search::lexical::search_with_options(
2519 self.storage.connection(),
2520 query,
2521 limit,
2522 include_generated,
2523 explain,
2524 options,
2525 )?;
2526 if !allow_heal {
2527 return Ok(hits);
2528 }
2529 let stale = self.stale_hit_paths(&hits)?;
2530 if stale.is_empty() {
2531 return Ok(hits);
2532 }
2533 if stale.len() > MAX_AUTO_HEAL_FILES_PER_CALL {
2534 anyhow::bail!(IndexError::NeedsReindex {
2535 stale_files: stale.len(),
2536 cap: MAX_AUTO_HEAL_FILES_PER_CALL,
2537 });
2538 }
2539 for path in stale {
2540 self.heal_file(Path::new(&path))?;
2541 }
2542 self.sync_fts()?;
2543 self.search_with_heal(query, limit, include_generated, false, explain, options)
2544 }
2545
2546 fn stale_hit_paths(&self, hits: &[SearchHit]) -> anyhow::Result<Vec<String>> {
2547 let Some(root) = self.storage.source_root() else {
2548 return Ok(Vec::new());
2549 };
2550 let mut stale = Vec::new();
2551 let mut seen = BTreeSet::new();
2552 for hit in hits {
2553 if !seen.insert(hit.path.clone()) {
2554 continue;
2555 }
2556 let source_path = root.join(&hit.path);
2557 let Ok(text) = fs::read_to_string(source_path) else {
2558 stale.push(hit.path.clone());
2559 continue;
2560 };
2561 let chunk = crate::query::read_chunk(self.storage.connection(), hit.chunk_id)?;
2562 let Some(chunk) = chunk else {
2563 stale.push(hit.path.clone());
2564 continue;
2565 };
2566 let anchor = self.chunk_anchor(hit.chunk_id)?;
2567 let status = anchors::validate(
2568 &chunk.text,
2569 usize::try_from(chunk.start_line).unwrap_or(1),
2570 usize::try_from(chunk.end_line).unwrap_or(1),
2571 &anchor,
2572 &text,
2573 );
2574 if !matches!(status, AnchorStatus::Exact) {
2575 stale.push(hit.path.clone());
2576 }
2577 }
2578 Ok(stale)
2579 }
2580
2581 fn chunk_anchor(&self, chunk_id: i64) -> anyhow::Result<ChunkAnchor> {
2582 Ok(self.storage.connection().query_row(
2583 "
2584 SELECT anchor_version, normalized_hash, start_boundary_hash, end_boundary_hash,
2585 start_context_hash, end_context_hash, context_radius
2586 FROM chunks WHERE id = ?1
2587 ",
2588 [chunk_id],
2589 |row| {
2590 Ok(ChunkAnchor {
2591 version: row.get(0)?,
2592 normalized_hash: row.get(1)?,
2593 start_boundary_hash: row.get(2)?,
2594 end_boundary_hash: row.get(3)?,
2595 start_context_hash: row.get(4)?,
2596 end_context_hash: row.get(5)?,
2597 context_radius: row.get(6)?,
2598 })
2599 },
2600 )?)
2601 }
2602
2603 fn mark_file_deleted(&self, path: &Path) -> anyhow::Result<()> {
2604 let path = path_string(path);
2605 self.remove_file_in_scope(Path::new(&path), "", &self.active_worktree_id)?;
2606 self.storage.connection().execute(
2607 "INSERT INTO main.files(path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms, indexed_revision, commit_sha, worktree_id)
2608 VALUES (?1, 'unknown', 'deleted', '', 0, 0, ?2, '', '', ?3)
2609 ON CONFLICT(path, commit_sha, worktree_id) DO UPDATE SET
2610 kind = 'deleted',
2611 sha256 = '',
2612 modified_at_ms = 0,
2613 indexed_at_ms = excluded.indexed_at_ms",
2614 params![path, now_ms(), self.active_worktree_id],
2615 )?;
2616 self.mark_fts_dirty()?;
2617 Ok(())
2618 }
2619
2620 fn remove_file_in_scope(
2621 &self,
2622 path: &Path,
2623 commit_sha: &str,
2624 worktree_id: &str,
2625 ) -> anyhow::Result<()> {
2626 let path = path_string(path);
2627 self.storage.connection().execute(
2628 "UPDATE edges
2629 SET to_symbol_id = NULL,
2630 confidence = 'NameOnly'
2631 WHERE to_symbol_id IN (
2632 SELECT symbols.id FROM symbols
2633 JOIN main.files ON main.files.id = symbols.file_id
2634 WHERE main.files.path = ?1
2635 AND main.files.commit_sha = ?2
2636 AND main.files.worktree_id = ?3
2637 )",
2638 params![path, commit_sha, worktree_id],
2639 )?;
2640 self.storage.connection().execute(
2641 "DELETE FROM edges
2642 WHERE source_file_id IN (
2643 SELECT id FROM main.files
2644 WHERE path = ?1 AND commit_sha = ?2 AND worktree_id = ?3
2645 )
2646 OR from_symbol_id IN (
2647 SELECT symbols.id FROM symbols
2648 JOIN main.files ON main.files.id = symbols.file_id
2649 WHERE main.files.path = ?1
2650 AND main.files.commit_sha = ?2
2651 AND main.files.worktree_id = ?3
2652 )",
2653 params![path, commit_sha, worktree_id],
2654 )?;
2655 self.storage
2656 .connection()
2657 .execute("DELETE FROM parser_failures WHERE path = ?1", [&path])?;
2658 self.storage.connection().execute(
2659 "DELETE FROM chunk_fts
2660 WHERE rowid IN (
2661 SELECT chunks.id FROM chunks
2662 JOIN main.files ON main.files.id = chunks.file_id
2663 WHERE main.files.path = ?1
2664 AND main.files.commit_sha = ?2
2665 AND main.files.worktree_id = ?3
2666 )",
2667 params![path, commit_sha, worktree_id],
2668 )?;
2669 self.storage.connection().execute(
2670 "DELETE FROM chunks
2671 WHERE file_id IN (
2672 SELECT id FROM main.files
2673 WHERE path = ?1 AND commit_sha = ?2 AND worktree_id = ?3
2674 )",
2675 params![path, commit_sha, worktree_id],
2676 )?;
2677 self.storage.connection().execute(
2678 "DELETE FROM symbols
2679 WHERE file_id IN (
2680 SELECT id FROM main.files
2681 WHERE path = ?1 AND commit_sha = ?2 AND worktree_id = ?3
2682 )",
2683 params![path, commit_sha, worktree_id],
2684 )?;
2685 self.storage.connection().execute(
2686 "DELETE FROM main.files WHERE path = ?1 AND commit_sha = ?2 AND worktree_id = ?3",
2687 params![path, commit_sha, worktree_id],
2688 )?;
2689 self.mark_fts_dirty()?;
2690 Ok(())
2691 }
2692
2693 fn ensure_fts_fresh(&self) -> anyhow::Result<()> {
2694 let content_revision = self.content_revision()?;
2695 let fts_source_revision = self.meta("fts_source_revision")?;
2696 if !self.fts_dirty()? && fts_source_revision.as_deref() == Some(content_revision.as_str()) {
2697 return Ok(());
2698 }
2699 self.rebuild_fts()?;
2700 let refreshed_revision = self.meta("fts_source_revision")?;
2701 if refreshed_revision.as_deref() != Some(content_revision.as_str()) {
2702 anyhow::bail!(
2703 "FTS freshness invariant failed: content_revision={content_revision}, fts_source_revision={}",
2704 refreshed_revision.unwrap_or_else(|| "<missing>".to_string())
2705 );
2706 }
2707 Ok(())
2708 }
2709
2710 fn fts_dirty(&self) -> anyhow::Result<bool> {
2711 Ok(self.meta("fts_dirty")?.as_deref() == Some("true"))
2712 }
2713
2714 fn file_row(&self, path: &Path) -> anyhow::Result<FileRow> {
2715 self.storage
2716 .connection()
2717 .query_row(
2718 "SELECT language, kind FROM files WHERE path = ?1",
2719 [path_string(path)],
2720 |row| {
2721 let language: String = row.get(0)?;
2722 let kind: String = row.get(1)?;
2723 Ok((language, kind))
2724 },
2725 )
2726 .map_err(Into::into)
2727 .and_then(|(language, kind)| {
2728 Ok(FileRow { language: language.parse()?, kind: kind.parse()? })
2729 })
2730 }
2731
2732 fn graph_reindex_files(&self) -> anyhow::Result<Vec<GraphReindexFile>> {
2733 let mut stmt = self
2734 .storage
2735 .connection()
2736 .prepare("SELECT id, path, language, kind FROM files ORDER BY path")?;
2737 let rows = stmt.query_map([], |row| {
2738 let language: String = row.get(2)?;
2739 let kind: String = row.get(3)?;
2740 Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?, language, kind))
2741 })?;
2742 let mut files = Vec::new();
2743 for row in rows {
2744 let (id, path, language, kind) = row?;
2745 files.push(GraphReindexFile {
2746 id,
2747 path,
2748 language: language.parse()?,
2749 kind: kind.parse()?,
2750 });
2751 }
2752 Ok(files)
2753 }
2754
2755 fn indexed_files(&self) -> anyhow::Result<Vec<IndexedFile>> {
2756 let mut stmt =
2757 self.storage.connection().prepare("SELECT path, sha256 FROM files ORDER BY path")?;
2758 let rows =
2759 stmt.query_map([], |row| Ok(IndexedFile { path: row.get(0)?, sha256: row.get(1)? }))?;
2760 let mut files = Vec::new();
2761 for row in rows {
2762 files.push(row?);
2763 }
2764 Ok(files)
2765 }
2766
2767 fn indexed_file_count(&self) -> anyhow::Result<usize> {
2768 let count =
2769 self.storage
2770 .connection()
2771 .query_row("SELECT COUNT(*) FROM files", [], |row| row.get::<_, i64>(0))?;
2772 Ok(usize::try_from(count).unwrap_or(usize::MAX))
2773 }
2774
2775 fn content_revision(&self) -> anyhow::Result<String> {
2776 let value = self.storage.connection().query_row(
2777 "SELECT COALESCE(string_agg(path || ':' || sha256, ',' ORDER BY path), '') FROM files",
2778 [],
2779 |row| row.get::<_, String>(0),
2780 )?;
2781 Ok(hex_sha256(value.as_bytes()))
2782 }
2783}
2784
2785#[derive(Debug)]
2786struct FileRow {
2787 language: Language,
2788 kind: TargetKind,
2789}
2790
2791#[derive(Debug)]
2792struct GraphReindexFile {
2793 id: i64,
2794 path: String,
2795 language: Language,
2796 kind: TargetKind,
2797}
2798
2799#[derive(Debug)]
2800struct GraphPathRow {
2801 language: String,
2802 sha256: String,
2803 indexed_revision: String,
2804}
2805
2806fn rank_docs_for_symbol(symbol: &crate::query::symbol::SymbolHit, hits: &mut [SearchHit]) {
2807 let source_module = module_stem(&symbol.path);
2808 let symbol_name = symbol.name.to_ascii_lowercase();
2809 let qualified_name = symbol.qualified_name.to_ascii_lowercase();
2810 hits.sort_by(|a, b| {
2811 let a_rank = docs_locality_rank(symbol, &source_module, &symbol_name, &qualified_name, a);
2812 let b_rank = docs_locality_rank(symbol, &source_module, &symbol_name, &qualified_name, b);
2813 a_rank
2814 .cmp(&b_rank)
2815 .then_with(|| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal))
2816 .then_with(|| a.path.cmp(&b.path))
2817 .then_with(|| a.start_line.cmp(&b.start_line))
2818 });
2819 for (idx, hit) in hits.iter_mut().enumerate() {
2820 hit.score = (10_000usize.saturating_sub(idx)) as f64;
2821 }
2822}
2823
2824fn docs_locality_rank(
2825 symbol: &crate::query::symbol::SymbolHit,
2826 source_module: &str,
2827 symbol_name: &str,
2828 qualified_name: &str,
2829 hit: &SearchHit,
2830) -> u8 {
2831 let path = hit.path.to_ascii_lowercase();
2832 let summary = hit.summary.to_ascii_lowercase();
2833 let hit_symbol = hit.symbol_path.as_deref().unwrap_or_default().to_ascii_lowercase();
2834 if hit.path == symbol.path && hit_symbol == symbol.qualified_name.to_ascii_lowercase() {
2835 return 0;
2836 }
2837 if hit.path == symbol.path {
2838 return 1;
2839 }
2840 if !source_module.is_empty()
2841 && path.contains(source_module)
2842 && (summary.contains(symbol_name) || hit_symbol.contains(symbol_name))
2843 {
2844 return 2;
2845 }
2846 if summary.contains(qualified_name) || hit_symbol.contains(qualified_name) {
2847 return 3;
2848 }
2849 if summary.contains(symbol_name) || hit_symbol.contains(symbol_name) {
2850 return 4;
2851 }
2852 if !source_module.is_empty() && path.contains(source_module) {
2853 return 5;
2854 }
2855 9
2856}
2857
2858fn module_stem(path: &str) -> String {
2859 Path::new(path)
2860 .file_stem()
2861 .and_then(|value| value.to_str())
2862 .unwrap_or_default()
2863 .to_ascii_lowercase()
2864}
2865
2866fn dedupe_search_hits(hits: &mut Vec<SearchHit>) {
2867 let mut seen = BTreeSet::new();
2868 hits.retain(|hit| seen.insert(hit.chunk_id));
2869}
2870
2871fn bounded_summary(text: &str) -> String {
2872 text.split_whitespace().collect::<Vec<_>>().join(" ").chars().take(240).collect()
2873}
2874
2875#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
2876struct LogicalSymbolKey {
2877 language: String,
2878 path: String,
2879 name: String,
2880 qualified_name: String,
2881 kind: String,
2882}
2883
2884impl LogicalSymbolKey {
2885 fn from(row: &LogicalSymbolMemberRow) -> Self {
2886 Self {
2887 language: row.language.clone(),
2888 path: row.path.clone(),
2889 name: row.name.clone(),
2890 qualified_name: row.qualified_name.clone(),
2891 kind: row.kind.clone(),
2892 }
2893 }
2894}
2895
2896#[derive(Debug, Clone)]
2897struct LogicalSymbolMemberRow {
2898 symbol_id: i64,
2899 path: String,
2900 language: String,
2901 name: String,
2902 qualified_name: String,
2903 kind: String,
2904 signature: Option<String>,
2905 start_line: i64,
2906 end_line: i64,
2907}
2908
2909fn symbol_line_for_byte(
2910 text: &str,
2911 chunk_start_byte: usize,
2912 chunk_start_line: i64,
2913 byte: usize,
2914) -> i64 {
2915 if byte <= chunk_start_byte {
2916 return chunk_start_line.max(1);
2917 }
2918 let local = byte.saturating_sub(chunk_start_byte).min(text.len());
2919 chunk_start_line
2920 + i64::try_from(text[..local].bytes().filter(|byte| *byte == b'\n').count()).unwrap_or(0)
2921}
2922
2923fn graph_only_reason(edge: &crate::query::graph::GraphHop, current_line: Option<&str>) -> String {
2924 let Some(line) = current_line else {
2925 return "missing_current_source_line".to_string();
2926 };
2927 if edge
2928 .target_qualified_name
2929 .as_deref()
2930 .is_some_and(|qualified| !qualified.is_empty() && line.contains(qualified))
2931 {
2932 return "qualified_call_pattern_mismatch".to_string();
2933 }
2934 if edge.target.as_deref().is_some_and(|target| !target.is_empty() && line.contains(target)) {
2935 return "imported_or_unqualified_call".to_string();
2936 }
2937 if edge
2938 .evidence
2939 .as_deref()
2940 .is_some_and(|evidence| !evidence.is_empty() && line.contains(evidence.trim()))
2941 {
2942 return "regex_too_narrow".to_string();
2943 }
2944 "stale_or_overbroad_graph_edge".to_string()
2945}
2946
2947fn is_likely_false_positive_graph_only(
2948 edge: &crate::query::graph::GraphHop,
2949 graph_only: &crate::query::graph::GraphOnlyEdge,
2950) -> bool {
2951 if graph_only.likely_reason == "stale_or_overbroad_graph_edge" {
2952 return true;
2953 }
2954 edge.resolution == "target_name_fallback"
2955 || edge.confidence == "NameOnly"
2956 || edge.confidence == "Ambiguous"
2957 || !edge.verified_target_symbol
2958}
2959
2960fn classify_text_only_hit(
2961 path: &str,
2962 text: &str,
2963 parser_failure_paths: &BTreeSet<String>,
2964) -> &'static str {
2965 if parser_failure_paths.contains(path) {
2966 return "parser_failure";
2967 }
2968 if is_generated_path(path) {
2969 return "generated_text_mention";
2970 }
2971 let trimmed = text.trim_start();
2972 if is_comment_like_text(trimmed) {
2973 return "comment_text_mention";
2974 }
2975 if is_import_or_declaration_text(trimmed) {
2976 return "declaration_text_mention";
2977 }
2978 if is_test_like_path(path) && is_test_scaffolding_text(trimmed) {
2979 return "test_scaffolding_text_mention";
2980 }
2981 "parser_call_extraction"
2982}
2983
2984fn is_likely_parser_gap_kind(kind: &str) -> bool {
2985 matches!(kind, "parser_call_extraction" | "parser_failure")
2986}
2987
2988fn is_generated_path(path: &str) -> bool {
2989 path.contains("/generated/")
2990 || path.contains("/generated-web/")
2991 || path.ends_with(".d.ts")
2992 || path.ends_with("_bg.wasm.d.ts")
2993}
2994
2995fn is_comment_like_text(text: &str) -> bool {
2996 text.starts_with("//")
2997 || text.starts_with("/*")
2998 || text.starts_with('*')
2999 || text.starts_with("*/")
3000 || text.starts_with("#")
3001}
3002
3003fn is_import_or_declaration_text(text: &str) -> bool {
3004 text.starts_with("import ")
3005 || text.starts_with("export type ")
3006 || text.starts_with("export interface ")
3007 || text.starts_with("type ")
3008 || text.starts_with("interface ")
3009 || text.starts_with("declare ")
3010}
3011
3012fn is_test_scaffolding_text(text: &str) -> bool {
3013 text.contains(".mock")
3014 || text.contains("jest.")
3015 || text.contains("jest<")
3016 || text.contains("expect(")
3017 || text.contains("toHaveBeen")
3018 || text.contains("describe(")
3019 || text.contains("it(")
3020 || text.contains("test(")
3021}
3022
3023fn recommended_graph_text_fallback(
3024 parser_gaps: &[crate::query::graph::TextOnlyHit],
3025 graph_only_edges: &[crate::query::graph::GraphOnlyEdge],
3026) -> String {
3027 match (parser_gaps.is_empty(), graph_only_edges.is_empty()) {
3028 (false, false) => "both",
3029 (false, true) => "text",
3030 (true, false) => "graph",
3031 (true, true) => "none",
3032 }
3033 .to_string()
3034}
3035
3036fn compare_pattern_match_mode(pattern: &str, symbol_name: &str) -> String {
3037 if symbol_name.is_empty() {
3038 return "regex".to_string();
3039 }
3040 let escaped_call = format!("{symbol_name}\\(");
3041 let plain_call = format!("{symbol_name}(");
3042 if pattern.contains("\\b")
3043 || pattern.contains("\\W")
3044 || pattern.contains("[^")
3045 || pattern.contains(&escaped_call)
3046 || pattern.contains(&plain_call)
3047 {
3048 return "identifier_or_call".to_string();
3049 }
3050 if pattern.contains(symbol_name) {
3051 return "substring_identifier".to_string();
3052 }
3053 "regex".to_string()
3054}
3055
3056fn is_test_like_path(path: &str) -> bool {
3057 let lower = path.to_ascii_lowercase();
3058 lower.contains("/test/")
3059 || lower.contains("/tests/")
3060 || lower.contains("/__tests__/")
3061 || lower.ends_with("_test.rs")
3062 || lower.ends_with(".test.ts")
3063 || lower.ends_with(".test.tsx")
3064 || lower.ends_with(".spec.ts")
3065 || lower.ends_with(".spec.tsx")
3066}
3067
3068#[derive(Debug)]
3069struct IndexedFile {
3070 path: String,
3071 sha256: String,
3072}
3073
3074#[derive(Debug, Clone)]
3075struct IndexFile {
3076 full_path: PathBuf,
3077 relative_path: PathBuf,
3078 language: Language,
3079 kind: TargetKind,
3080 commit_sha: String,
3081 worktree_id: String,
3082}
3083
3084#[derive(Debug, Clone)]
3085struct FileScope {
3086 commit_sha: String,
3087 worktree_id: String,
3088}
3089
3090impl FileScope {
3091 fn commit(commit_sha: String) -> Self {
3092 Self { commit_sha, worktree_id: String::new() }
3093 }
3094
3095 fn worktree(worktree_id: String) -> Self {
3096 Self { commit_sha: String::new(), worktree_id }
3097 }
3098}
3099
3100#[derive(Debug)]
3101struct PreparedIndexFile {
3102 file: IndexFile,
3103 prepared: anyhow::Result<PreparedIndexContent>,
3104}
3105
3106#[derive(Debug)]
3107struct PreparedIndexContent {
3108 modified_at_ms: i64,
3109 text: String,
3110 sha256: String,
3111 chunks: Vec<Chunk>,
3112 symbols: Vec<Symbol>,
3113 parser_failure: Option<String>,
3114}
3115
3116#[derive(Debug)]
3117struct DiscoveryPlan {
3118 files: Vec<IndexFile>,
3119 deleted: BTreeSet<PathBuf>,
3120 unindexed: Vec<IndexFile>,
3121 changed: Vec<PathBuf>,
3122 discovered_files: usize,
3123 indexed_files: usize,
3124}
3125
3126#[derive(Debug, Default)]
3127struct GitChangedPaths {
3128 changed: BTreeSet<PathBuf>,
3129 deleted: BTreeSet<PathBuf>,
3130}
3131
3132fn collect_index_files(config: &Config) -> anyhow::Result<Vec<IndexFile>> {
3133 let mut targets = config.targets.iter().collect::<Vec<_>>();
3134 targets.sort_by_key(|target| match target.kind {
3135 TargetKind::Generated => 0,
3136 TargetKind::Tests => 1,
3137 TargetKind::Docs => 2,
3138 TargetKind::Source => 3,
3139 });
3140 let mut seen = BTreeSet::new();
3141 let mut files = Vec::new();
3142
3143 for target in targets {
3144 for file in walker::walk_target(&config.root, target)? {
3145 let relative_path = file.strip_prefix(&config.root)?.to_path_buf();
3146 if !seen.insert(relative_path.clone()) {
3147 continue;
3148 }
3149 files.push(IndexFile {
3150 full_path: file,
3151 relative_path,
3152 language: target.language,
3153 kind: target.kind,
3154 commit_sha: String::new(),
3155 worktree_id: String::new(),
3156 });
3157 }
3158 }
3159
3160 Ok(files)
3161}
3162
3163fn collect_changed_index_files(
3164 config: &Config,
3165 changes: &GitChangedPaths,
3166) -> anyhow::Result<Vec<IndexFile>> {
3167 let mut files = Vec::new();
3168 for relative_path in &changes.changed {
3169 let full_path = config.root.join(relative_path);
3170 if !full_path.is_file() {
3171 continue;
3172 }
3173 let Some((language, kind)) = target_for_path(config, relative_path) else {
3174 continue;
3175 };
3176 files.push(IndexFile {
3177 full_path,
3178 relative_path: relative_path.clone(),
3179 language,
3180 kind,
3181 commit_sha: String::new(),
3182 worktree_id: String::new(),
3183 });
3184 }
3185 Ok(files)
3186}
3187
3188fn spawn_git_history_prepare(
3189 root: &Path,
3190) -> JoinHandle<anyhow::Result<git_history::PreparedGitHistory>> {
3191 let root = root.to_path_buf();
3192 thread::spawn(move || git_history::prepare(&root))
3193}
3194
3195fn join_git_history_prepare(
3196 handle: JoinHandle<anyhow::Result<git_history::PreparedGitHistory>>,
3197) -> anyhow::Result<git_history::PreparedGitHistory> {
3198 handle.join().map_err(|_| anyhow::anyhow!("git history preparation panicked"))?
3199}
3200
3201fn prepare_index_file(file: &IndexFile) -> PreparedIndexFile {
3202 PreparedIndexFile { file: file.clone(), prepared: prepare_index_content(file) }
3203}
3204
3205fn prepare_files_with_progress<F>(
3206 files: &[IndexFile],
3207 progress: &mut F,
3208) -> anyhow::Result<Vec<PreparedIndexFile>>
3209where
3210 F: FnMut(IndexProgress),
3211{
3212 #[derive(Debug)]
3213 struct PreparedProgress {
3214 current: usize,
3215 total: usize,
3216 path: PathBuf,
3217 language: Language,
3218 kind: TargetKind,
3219 }
3220
3221 let total = files.len();
3222 let prepared = thread::scope(|scope| {
3223 let (tx, rx) = mpsc::channel();
3224 let completed = AtomicUsize::new(0);
3225 let handle = scope.spawn(move || {
3226 files
3227 .par_iter()
3228 .map(|file| {
3229 let prepared = prepare_index_file(file);
3230 let current = completed.fetch_add(1, Ordering::Relaxed) + 1;
3231 if should_report_file_progress(current, total) {
3232 let _ = tx.send(PreparedProgress {
3233 current,
3234 total,
3235 path: file.relative_path.clone(),
3236 language: file.language,
3237 kind: file.kind,
3238 });
3239 }
3240 prepared
3241 })
3242 .collect::<Vec<_>>()
3243 });
3244
3245 for event in rx {
3246 progress(IndexProgress::PreparingFile {
3247 current: event.current,
3248 total: event.total,
3249 path: event.path,
3250 language: event.language,
3251 kind: event.kind,
3252 });
3253 }
3254
3255 handle.join().map_err(|_| anyhow::anyhow!("parallel file preparation panicked"))
3256 })?;
3257 Ok(prepared)
3258}
3259
3260fn should_report_file_progress(current: usize, total: usize) -> bool {
3261 if total == 0 {
3262 return false;
3263 }
3264 current == 1
3265 || current == total
3266 || current.saturating_mul(10) / total
3267 != current.saturating_sub(1).saturating_mul(10) / total
3268}
3269
3270fn prepare_index_content(file: &IndexFile) -> anyhow::Result<PreparedIndexContent> {
3271 let text = fs::read_to_string(&file.full_path)?;
3272 let modified_at_ms = file_metadata_ms(&file.full_path)?;
3273 let sha256 = hex_sha256(text.as_bytes());
3274 let parser_failure =
3275 if file.language != Language::Markdown && file.kind != TargetKind::Generated {
3276 if text.len() > chunker::MAX_STRUCTURAL_PARSE_BYTES {
3277 None
3278 } else {
3279 parser::parse_error(&file.relative_path, file.language, &text)
3280 .unwrap_or_else(|err| Some(err.to_string()))
3281 }
3282 } else {
3283 None
3284 };
3285 let chunks = if file.kind == TargetKind::Generated {
3286 chunker::generated_chunks_for_file(&file.relative_path, &text)
3287 } else {
3288 chunker::chunks_for_file(&file.relative_path, file.language, &text)
3289 };
3290 let symbols =
3291 if file.kind == TargetKind::Generated || text.len() > chunker::MAX_STRUCTURAL_PARSE_BYTES {
3292 Vec::new()
3293 } else {
3294 symbols::symbols_for_file(&file.relative_path, file.language, &text)
3295 };
3296 Ok(PreparedIndexContent { modified_at_ms, text, sha256, chunks, symbols, parser_failure })
3297}
3298
3299fn discovery_plan(conn: &rusqlite::Connection, config: &Config) -> anyhow::Result<DiscoveryPlan> {
3300 let discovered = collect_index_files(config)?;
3301 let mut indexed = indexed_file_map(conn)?;
3302 let mut current_paths = BTreeSet::new();
3303 let mut files = Vec::new();
3304 let mut unindexed = Vec::new();
3305 let mut changed = Vec::new();
3306 let discovered_files = discovered.len();
3307 let hashed = discovered
3308 .par_iter()
3309 .map(|file| -> anyhow::Result<(IndexFile, String)> {
3310 let text = fs::read(&file.full_path)?;
3311 Ok((file.clone(), hex_sha256(&text)))
3312 })
3313 .collect::<Vec<_>>();
3314
3315 for hashed_file in hashed {
3316 let (file, current_hash) = hashed_file?;
3317 let relative = path_string(&file.relative_path);
3318 current_paths.insert(file.relative_path.clone());
3319 let Some(indexed_hash) = indexed.remove(&relative) else {
3320 unindexed.push(file.clone());
3321 files.push(file);
3322 continue;
3323 };
3324 if current_hash != indexed_hash {
3325 changed.push(file.relative_path.clone());
3326 files.push(file);
3327 }
3328 }
3329
3330 let deleted = indexed
3331 .into_keys()
3332 .map(PathBuf::from)
3333 .filter(|path| !current_paths.contains(path))
3334 .collect::<BTreeSet<_>>();
3335
3336 Ok(DiscoveryPlan {
3337 discovered_files,
3338 indexed_files: current_paths
3339 .len()
3340 .saturating_add(deleted.len())
3341 .saturating_sub(unindexed.len()),
3342 files,
3343 deleted,
3344 unindexed,
3345 changed,
3346 })
3347}
3348
3349fn indexed_file_map(conn: &rusqlite::Connection) -> anyhow::Result<BTreeMap<String, String>> {
3350 let mut stmt = conn.prepare("SELECT path, sha256 FROM files ORDER BY path")?;
3351 let rows =
3352 stmt.query_map([], |row| Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)))?;
3353 let mut files = BTreeMap::new();
3354 for row in rows {
3355 let (path, sha256) = row?;
3356 files.insert(path, sha256);
3357 }
3358 Ok(files)
3359}
3360
3361fn target_for_path(config: &Config, relative_path: &Path) -> Option<(Language, TargetKind)> {
3362 let relative = path_string(relative_path);
3363 let language = Language::from_path(relative_path)?;
3364 let mut targets = config.targets.iter().collect::<Vec<_>>();
3365 targets.sort_by_key(|target| match target.kind {
3366 TargetKind::Generated => 0,
3367 TargetKind::Tests => 1,
3368 TargetKind::Docs => 2,
3369 TargetKind::Source => 3,
3370 });
3371 targets.into_iter().find_map(|target| {
3372 if target.language != language {
3373 return None;
3374 }
3375 if !target.directories.iter().any(|directory| {
3376 directory.as_os_str().is_empty()
3377 || directory == Path::new(".")
3378 || relative_path.starts_with(directory)
3379 }) {
3380 return None;
3381 }
3382 if target.exclude.iter().any(|pattern| matches_simple_pattern(&relative, pattern)) {
3383 return None;
3384 }
3385 if !target.include.iter().any(|pattern| matches_simple_pattern(&relative, pattern)) {
3386 return None;
3387 }
3388 Some((target.language, target.kind))
3389 })
3390}
3391
3392fn git_changed_paths(root: &Path) -> anyhow::Result<GitChangedPaths> {
3393 let repo = gix::discover(root)?;
3394 let worktree_root = repo
3395 .workdir()
3396 .ok_or_else(|| anyhow::anyhow!("git repository has no worktree"))?
3397 .to_path_buf();
3398 let pathspec = config_root_pathspec(&worktree_root, root);
3399 let mut paths = GitChangedPaths::default();
3400
3401 for item in repo
3402 .status(gix::progress::Discard)?
3403 .untracked_files(UntrackedFiles::Files)
3404 .tree_index_track_renames(tree_index::TrackRenames::Disabled)
3405 .into_iter([pathspec])?
3406 {
3407 let item = item?;
3408 let Some(path) = repo_relative_path_to_config_path(&worktree_root, root, item.location())
3409 else {
3410 continue;
3411 };
3412 if root.join(&path).exists() {
3413 if !paths.deleted.contains(&path) {
3414 paths.changed.insert(path);
3415 }
3416 } else {
3417 paths.changed.remove(&path);
3418 paths.deleted.insert(path);
3419 }
3420 }
3421
3422 Ok(paths)
3423}
3424
3425fn repo_relative_path_to_config_path(
3426 worktree_root: &Path,
3427 config_root: &Path,
3428 repo_relative_path: &gix::bstr::BStr,
3429) -> Option<PathBuf> {
3430 let path = PathBuf::from(repo_relative_path.to_str_lossy().as_ref());
3431 worktree_root.join(path).strip_prefix(config_root).ok().map(Path::to_path_buf)
3432}
3433
3434fn config_root_pathspec(worktree_root: &Path, config_root: &Path) -> BString {
3435 let relative = config_root.strip_prefix(worktree_root).unwrap_or_else(|_| Path::new(""));
3436 let relative = path_string(relative);
3437 if relative.is_empty() || relative == "." {
3438 BString::from("*")
3439 } else {
3440 BString::from(format!("{relative}/**"))
3441 }
3442}
3443
3444fn matches_simple_pattern(path: &str, pattern: &str) -> bool {
3445 if let Some(extension) = pattern.strip_prefix("**/*.") {
3446 return path.ends_with(&format!(".{extension}"));
3447 }
3448 if let Some(prefix) = pattern.strip_suffix("/**") {
3449 return path.starts_with(prefix);
3450 }
3451 path == pattern || path.contains(pattern.trim_matches('*'))
3452}
3453
3454fn meta_for(conn: &rusqlite::Connection, key: &str) -> anyhow::Result<Option<String>> {
3455 Ok(conn
3456 .query_row("SELECT value FROM index_meta WHERE key = ?1", [key], |row| row.get(0))
3457 .optional()?)
3458}
3459
3460fn git_output(root: &Path, args: &[&str]) -> Option<String> {
3461 let output = Command::new("git").args(args).current_dir(root).output().ok()?;
3462 if !output.status.success() {
3463 return None;
3464 }
3465 Some(String::from_utf8_lossy(&output.stdout).trim().to_string())
3466}
3467
3468fn resolve_git_context(root: &Path) -> (String, String) {
3469 let commit_sha =
3470 git_output(root, &["rev-parse", "HEAD"]).map(|s| s.trim().to_string()).unwrap_or_default();
3471 let worktree_id = root.to_string_lossy().trim_end_matches('/').to_string();
3472 (commit_sha, worktree_id)
3473}
3474
3475fn file_metadata_ms(path: &Path) -> anyhow::Result<i64> {
3476 let modified = fs::metadata(path)?.modified()?;
3477 Ok(duration_ms(modified.duration_since(UNIX_EPOCH)?))
3478}
3479
3480fn now_ms() -> i64 {
3481 duration_ms(SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default())
3482}
3483
3484fn duration_ms(duration: std::time::Duration) -> i64 {
3485 i64::try_from(duration.as_millis()).unwrap_or(i64::MAX)
3486}
3487
3488fn hex_sha256(bytes: &[u8]) -> String {
3489 let hash = Sha256::digest(bytes);
3490 let mut out = String::with_capacity(hash.len() * 2);
3491 for byte in hash {
3492 use std::fmt::Write as _;
3493 let _ = write!(out, "{byte:02x}");
3494 }
3495 out
3496}
3497
3498fn path_string(path: &Path) -> String {
3499 path.to_string_lossy().replace('\\', "/")
3500}
3501
3502#[cfg(test)]
3503mod schema_bootstrap_tests {
3504 use std::sync::atomic::{AtomicU64, Ordering};
3505
3506 use super::*;
3507 use crate::config::ResolvedTarget;
3508
3509 static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
3510
3511 #[test]
3512 fn rebuild_bootstraps_sqlite_schema_for_empty_target_root() {
3513 let root = unique_temp_root();
3514 let _ = fs::remove_dir_all(&root);
3515 let docs = root.join("docs");
3516 fs::create_dir_all(&docs).unwrap();
3517
3518 let config = Config {
3519 root: root.clone(),
3520 database: root.join(".rag-rat/index.sqlite"),
3521 targets: vec![ResolvedTarget {
3522 name: "markdown".to_string(),
3523 language: Language::Markdown,
3524 directories: vec![PathBuf::from("docs")],
3525 include: vec!["**/*.md".to_string()],
3526 exclude: Vec::new(),
3527 kind: TargetKind::Docs,
3528 }],
3529 local_ai: Default::default(),
3530 };
3531
3532 let db = IndexDatabase::rebuild(&config).unwrap();
3533 assert!(config.database.exists());
3534 assert_eq!(table_count(&db, "files"), 1);
3535 assert_eq!(table_count(&db, "chunks"), 1);
3536 assert_eq!(table_count(&db, "symbols"), 1);
3537 assert_eq!(table_count(&db, "parser_failures"), 1);
3538 assert_eq!(table_count(&db, "index_meta"), 1);
3539 assert_eq!(table_count(&db, "chunk_fts"), 1);
3540 assert_eq!(table_count(&db, "git_commits"), 1);
3541 assert_eq!(table_count(&db, "git_file_changes"), 1);
3542 assert_eq!(table_count(&db, "git_chunk_blame"), 1);
3543 assert_eq!(table_count(&db, "commit_fts"), 1);
3544 assert_eq!(table_count(&db, "ai_models"), 1);
3545 assert_eq!(table_count(&db, "chunk_embeddings"), 1);
3546 assert_eq!(table_count(&db, "chunk_summaries"), 1);
3547 assert_eq!(table_count(&db, "reconcile_meta"), 1);
3548 assert_eq!(table_count(&db, "reconcile_attempts"), 1);
3549 assert!(file_columns(&db).contains(&"indexed_revision".to_string()));
3550 assert_eq!(indexed_revision_count(&db), 0);
3551 assert!(chunk_columns(&db).contains(&"anchor_version".to_string()));
3552 assert!(chunk_columns(&db).contains(&"normalized_hash".to_string()));
3553 assert!(chunk_columns(&db).contains(&"start_boundary_hash".to_string()));
3554 assert!(chunk_columns(&db).contains(&"end_boundary_hash".to_string()));
3555 assert!(chunk_columns(&db).contains(&"source_revision".to_string()));
3556 let embedding_columns = table_columns(&db, "chunk_embeddings");
3557 assert!(embedding_columns.contains(&"model_version".to_string()));
3558 assert!(embedding_columns.contains(&"input_hash".to_string()));
3559 assert!(embedding_columns.contains(&"embedding_text_version".to_string()));
3560 assert!(embedding_columns.contains(&"embedding_policy".to_string()));
3561 assert!(embedding_columns.contains(&"embedding_priority".to_string()));
3562 assert!(embedding_columns.contains(&"input_chars".to_string()));
3563 assert!(embedding_columns.contains(&"input_truncated".to_string()));
3564 assert!(embedding_columns.contains(&"attempt_count".to_string()));
3565 assert!(embedding_columns.contains(&"next_retry_after_ms".to_string()));
3566 assert!(embedding_columns.contains(&"computed_at_ms".to_string()));
3567 let edge_columns = table_columns(&db, "edges");
3568 assert!(edge_columns.contains(&"source_start_line".to_string()));
3569 assert!(edge_columns.contains(&"source_end_line".to_string()));
3570 assert!(edge_columns.contains(&"source_start_byte".to_string()));
3571 assert!(edge_columns.contains(&"source_end_byte".to_string()));
3572 assert!(edge_columns.contains(&"target_start_line".to_string()));
3573 assert!(edge_columns.contains(&"target_end_line".to_string()));
3574 assert!(edge_columns.contains(&"target_qualified_name".to_string()));
3575 assert!(edge_columns.contains(&"evidence".to_string()));
3576 assert!(edge_columns.contains(&"receiver_hint".to_string()));
3577 assert!(edge_columns.contains(&"resolution".to_string()));
3578 let logical_columns = table_columns(&db, "logical_symbols");
3579 assert!(logical_columns.contains(&"qualified_name".to_string()));
3580 assert!(logical_columns.contains(&"variant_count".to_string()));
3581 let member_columns = table_columns(&db, "logical_symbol_members");
3582 assert!(member_columns.contains(&"symbol_id".to_string()));
3583 assert!(member_columns.contains(&"signature_hash".to_string()));
3584 let github_ref_sync_columns = table_columns(&db, "github_ref_sync");
3585 assert!(github_ref_sync_columns.contains(&"status".to_string()));
3586 assert!(github_ref_sync_columns.contains(&"last_error".to_string()));
3587 let symbol_fact_columns = table_columns(&db, "symbol_facts");
3588 assert!(symbol_fact_columns.contains(&"fact_kind".to_string()));
3589 assert!(symbol_fact_columns.contains(&"fact_value".to_string()));
3590 assert_eq!(
3591 db.status(&config.database).unwrap().schema.current_version,
3592 schema::LATEST_SCHEMA_VERSION
3593 );
3594
3595 fs::remove_dir_all(root).unwrap();
3596 }
3597
3598 #[test]
3599 fn rebuild_reports_file_preparation_progress() {
3600 let root = unique_temp_root();
3601 let _ = fs::remove_dir_all(&root);
3602 fs::create_dir_all(root.join("src")).unwrap();
3603 fs::write(root.join("src/lib.rs"), "pub fn exported() {}\n").unwrap();
3604
3605 let config = source_config(root.clone(), Language::Rust);
3606 let mut events = Vec::new();
3607 IndexDatabase::rebuild_with_progress(&config, |progress| events.push(progress)).unwrap();
3608
3609 assert!(
3610 events.iter().any(|event| matches!(event, IndexProgress::PreparingFile { .. })),
3611 "missing preparing progress event: {events:?}"
3612 );
3613 assert!(
3614 events.iter().any(|event| matches!(event, IndexProgress::IndexingFile { .. })),
3615 "missing indexing progress event: {events:?}"
3616 );
3617
3618 fs::remove_dir_all(root).unwrap();
3619 }
3620
3621 #[test]
3622 fn file_progress_reports_first_final_and_decile_boundaries() {
3623 let reported = (1..=100)
3624 .filter(|current| should_report_file_progress(*current, 100))
3625 .collect::<Vec<_>>();
3626 assert_eq!(reported, vec![1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]);
3627 }
3628
3629 #[test]
3630 fn compatible_open_requires_recorded_schema_version() {
3631 let root = unique_temp_root();
3632 let _ = fs::remove_dir_all(&root);
3633 fs::create_dir_all(root.join(".rag-rat")).unwrap();
3634 let database = root.join(".rag-rat/index.sqlite");
3635 IndexDatabase::migrate(&database).unwrap();
3636 let conn = rusqlite::Connection::open(&database).unwrap();
3637 conn.execute_batch("DROP TABLE schema_version;").unwrap();
3638 drop(conn);
3639
3640 let status = IndexDatabase::migration_check(&database).unwrap();
3641 assert_eq!(status.state, schema::SchemaState::Older);
3642 let err = IndexDatabase::open(&database).unwrap_err().to_string();
3643 assert!(err.contains("run `rag-rat migrate`"), "{err}");
3644
3645 let migrated = IndexDatabase::migrate(&database).unwrap();
3646 assert_eq!(migrated.state, schema::SchemaState::Compatible);
3647 IndexDatabase::open(&database).unwrap();
3648
3649 fs::remove_dir_all(root).unwrap();
3650 }
3651
3652 #[test]
3653 fn migrate_adds_edge_name_columns_before_indexing_them() {
3654 let root = unique_temp_root();
3655 let _ = fs::remove_dir_all(&root);
3656 fs::create_dir_all(root.join(".rag-rat")).unwrap();
3657 let database = root.join(".rag-rat/index.sqlite");
3658 let conn = rusqlite::Connection::open(&database).unwrap();
3659 conn.execute_batch(
3660 "
3661 CREATE TABLE files(
3662 id INTEGER PRIMARY KEY AUTOINCREMENT,
3663 path TEXT NOT NULL UNIQUE,
3664 language TEXT NOT NULL,
3665 kind TEXT NOT NULL,
3666 sha256 TEXT NOT NULL,
3667 modified_at_ms INTEGER NOT NULL,
3668 generated INTEGER NOT NULL DEFAULT 0,
3669 indexed_at_ms INTEGER NOT NULL
3670 );
3671 CREATE TABLE chunks(
3672 id INTEGER PRIMARY KEY AUTOINCREMENT,
3673 file_id INTEGER NOT NULL,
3674 chunk_kind TEXT NOT NULL,
3675 symbol_path TEXT,
3676 start_byte INTEGER NOT NULL,
3677 end_byte INTEGER NOT NULL,
3678 start_line INTEGER NOT NULL,
3679 end_line INTEGER NOT NULL,
3680 text TEXT NOT NULL,
3681 text_hash TEXT NOT NULL
3682 );
3683 CREATE TABLE symbols(
3684 id INTEGER PRIMARY KEY AUTOINCREMENT,
3685 file_id INTEGER NOT NULL,
3686 language TEXT NOT NULL,
3687 name TEXT NOT NULL,
3688 qualified_name TEXT NOT NULL,
3689 kind TEXT NOT NULL,
3690 start_byte INTEGER NOT NULL,
3691 end_byte INTEGER NOT NULL,
3692 signature TEXT,
3693 docs TEXT
3694 );
3695 CREATE TABLE edges(
3696 id INTEGER PRIMARY KEY AUTOINCREMENT,
3697 from_symbol_id INTEGER,
3698 to_symbol_id INTEGER,
3699 edge_kind TEXT NOT NULL,
3700 confidence TEXT NOT NULL
3701 );
3702 ",
3703 )
3704 .unwrap();
3705 drop(conn);
3706
3707 let migrated = IndexDatabase::migrate(&database).unwrap();
3708 assert_eq!(migrated.state, schema::SchemaState::Compatible);
3709 let db = IndexDatabase::open(&database).unwrap();
3710 let columns = table_columns(&db, "edges");
3711 assert!(columns.contains(&"from_name".to_string()));
3712 assert!(columns.contains(&"to_name".to_string()));
3713 assert!(columns.contains(&"source_start_line".to_string()));
3714 assert!(columns.contains(&"source_end_line".to_string()));
3715 assert!(columns.contains(&"source_start_byte".to_string()));
3716 assert!(columns.contains(&"source_end_byte".to_string()));
3717 assert!(columns.contains(&"target_start_line".to_string()));
3718 assert!(columns.contains(&"target_end_line".to_string()));
3719 assert_eq!(table_count(&db, "idx_edges_from_name"), 1);
3720 assert_eq!(table_count(&db, "idx_edges_to_name"), 1);
3721
3722 fs::remove_dir_all(root).unwrap();
3723 }
3724
3725 #[test]
3726 fn migrate_preserves_github_papertrail_cache() {
3727 let (root, config) =
3728 markdown_config("# Decision\nRefs cq27-dev/rag-rat#42\nwe will keep sqlite\n");
3729 let db = IndexDatabase::rebuild(&config).unwrap();
3730 github::sync_from_refs(db.storage.connection(), &root, Some(&MockGitHubClient), false)
3731 .unwrap();
3732 assert_eq!(row_count(&db, "github_refs"), 1);
3733 assert_eq!(row_count(&db, "github_issues"), 1);
3734 assert_eq!(row_count(&db, "github_comments"), 1);
3735 assert_eq!(row_count(&db, "github_pull_requests"), 1);
3736 assert_eq!(row_count(&db, "github_reviews"), 1);
3737 assert_eq!(row_count(&db, "github_review_comments"), 1);
3738 assert_eq!(row_count(&db, "github_fts"), 5);
3739 db.storage
3740 .connection()
3741 .execute("DELETE FROM schema_version WHERE id = ?1", ["010_symbol_facts"])
3742 .unwrap();
3743 drop(db);
3744
3745 let migrated = IndexDatabase::migrate(&config.database).unwrap();
3746 assert_eq!(migrated.state, schema::SchemaState::Compatible);
3747 let db = IndexDatabase::open(&config.database).unwrap();
3748 assert_eq!(row_count(&db, "github_refs"), 1);
3749 assert_eq!(row_count(&db, "github_issues"), 1);
3750 assert_eq!(row_count(&db, "github_comments"), 1);
3751 assert_eq!(row_count(&db, "github_pull_requests"), 1);
3752 assert_eq!(row_count(&db, "github_reviews"), 1);
3753 assert_eq!(row_count(&db, "github_review_comments"), 1);
3754 assert_eq!(row_count(&db, "github_fts"), 5);
3755 let hits = db.github_issue_search("sqlite", 10).unwrap();
3756 assert_eq!(hits.len(), 1);
3757 assert_eq!(hits[0].number, 42);
3758
3759 fs::remove_dir_all(root).unwrap();
3760 }
3761
3762 #[test]
3763 fn full_rebuild_preserves_github_papertrail_cache() {
3764 let (root, config) =
3765 markdown_config("# Decision\nRefs cq27-dev/rag-rat#42\nwe will keep sqlite\n");
3766 let db = IndexDatabase::rebuild(&config).unwrap();
3767 github::sync_from_refs(db.storage.connection(), &root, Some(&MockGitHubClient), false)
3768 .unwrap();
3769 assert_eq!(row_count(&db, "github_issues"), 1);
3770 assert_eq!(row_count(&db, "github_fts"), 5);
3771 drop(db);
3772
3773 let db = IndexDatabase::rebuild(&config).unwrap();
3774
3775 assert_eq!(row_count(&db, "github_refs"), 1);
3776 assert_eq!(row_count(&db, "github_issues"), 1);
3777 assert_eq!(row_count(&db, "github_comments"), 1);
3778 assert_eq!(row_count(&db, "github_pull_requests"), 1);
3779 assert_eq!(row_count(&db, "github_reviews"), 1);
3780 assert_eq!(row_count(&db, "github_review_comments"), 1);
3781 assert_eq!(row_count(&db, "github_ref_sync"), 1);
3782 assert_eq!(row_count(&db, "github_fts"), 5);
3783 let hits = db.github_issue_search("sqlite", 10).unwrap();
3784 assert_eq!(hits.len(), 1);
3785 assert_eq!(hits[0].number, 42);
3786
3787 fs::remove_dir_all(root).unwrap();
3788 }
3789
3790 #[test]
3791 fn full_rebuild_preserves_installed_model_manifest() {
3792 let (root, config) = markdown_config("alpha token with enough detail for embeddings\n");
3793 let db = IndexDatabase::rebuild(&config).unwrap();
3794 db.install_model(ai::HASH_MODEL_ID).unwrap();
3795 let before = db.local_ai_status().unwrap();
3796 assert_eq!(before.embedding.model_id, ai::HASH_MODEL_ID);
3797 assert!(before.embedding.installed);
3798 drop(db);
3799
3800 let db = IndexDatabase::rebuild(&config).unwrap();
3801
3802 let after = db.local_ai_status().unwrap();
3803 assert_eq!(after.embedding.model_id, ai::HASH_MODEL_ID);
3804 assert!(after.embedding.installed);
3805 assert_eq!(after.embedding.state, "Ready");
3806
3807 fs::remove_dir_all(root).unwrap();
3808 }
3809
3810 #[test]
3811 fn full_rebuild_preserves_other_worktree_contexts() {
3812 let root = unique_temp_root();
3813 let _ = fs::remove_dir_all(&root);
3814 fs::create_dir_all(root.join("src")).unwrap();
3815 fs::write(root.join("src/lib.rs"), "pub fn current_context() {}\n").unwrap();
3816 let config = source_config(root.clone(), Language::Rust);
3817 let db = IndexDatabase::rebuild(&config).unwrap();
3818 let other_file_id = db
3819 .storage
3820 .connection()
3821 .query_row(
3822 "
3823 INSERT INTO main.files(
3824 path, language, kind, sha256, modified_at_ms, generated, indexed_at_ms,
3825 indexed_revision, commit_sha, worktree_id
3826 )
3827 VALUES ('src/other.rs', 'rust', 'source', 'other-sha', 0, 0, 1, 'other-sha', '', 'other-worktree')
3828 RETURNING id
3829 ",
3830 [],
3831 |row| row.get::<_, i64>(0),
3832 )
3833 .unwrap();
3834 let other_chunk_id = db
3835 .storage
3836 .connection()
3837 .query_row(
3838 "
3839 INSERT INTO main.chunks(
3840 file_id, chunk_kind, symbol_path, start_byte, end_byte, start_line, end_line,
3841 text, text_hash, source_revision, anchor_version, normalized_hash,
3842 start_boundary_hash, end_boundary_hash, start_context_hash, end_context_hash,
3843 context_radius, embedding_policy, embedding_priority
3844 )
3845 VALUES (?1, 'symbol', 'other_context', 0, 12, 1, 1, 'other context', 'other-text',
3846 'other-sha', 1, '', '', '', '', '', 2, 'Embed', 1)
3847 RETURNING id
3848 ",
3849 [other_file_id],
3850 |row| row.get::<_, i64>(0),
3851 )
3852 .unwrap();
3853 db.storage
3854 .connection()
3855 .execute(
3856 "
3857 INSERT INTO main.symbols(
3858 file_id, language, name, qualified_name, kind, start_byte, end_byte, signature, docs
3859 )
3860 VALUES (?1, 'rust', 'other_context', 'other_context', 'function', 0, 12, NULL, NULL)
3861 ",
3862 [other_file_id],
3863 )
3864 .unwrap();
3865 db.storage
3866 .connection()
3867 .execute(
3868 "INSERT INTO main.chunk_fts(rowid, text) VALUES (?1, 'other context')",
3869 [other_chunk_id],
3870 )
3871 .unwrap();
3872 drop(db);
3873
3874 let db = IndexDatabase::rebuild(&config).unwrap();
3875
3876 assert_eq!(
3877 db.storage
3878 .connection()
3879 .query_row(
3880 "SELECT COUNT(*) FROM main.files WHERE worktree_id = 'other-worktree'",
3881 [],
3882 |row| row.get::<_, i64>(0)
3883 )
3884 .unwrap(),
3885 1
3886 );
3887 assert_eq!(
3888 db.storage
3889 .connection()
3890 .query_row(
3891 "SELECT COUNT(*) FROM main.chunks WHERE file_id = ?1",
3892 [other_file_id],
3893 |row| { row.get::<_, i64>(0) }
3894 )
3895 .unwrap(),
3896 1
3897 );
3898 assert_eq!(
3899 db.storage
3900 .connection()
3901 .query_row(
3902 "SELECT COUNT(*) FROM main.symbols WHERE file_id = ?1",
3903 [other_file_id],
3904 |row| { row.get::<_, i64>(0) }
3905 )
3906 .unwrap(),
3907 1
3908 );
3909 assert_eq!(
3910 db.storage
3911 .connection()
3912 .query_row(
3913 "SELECT COUNT(*) FROM main.chunk_fts WHERE rowid = ?1",
3914 [other_chunk_id],
3915 |row| { row.get::<_, i64>(0) }
3916 )
3917 .unwrap(),
3918 1
3919 );
3920
3921 fs::remove_dir_all(root).unwrap();
3922 }
3923
3924 #[test]
3925 fn compatible_open_refuses_dirty_and_newer_schema() {
3926 let root = unique_temp_root();
3927 let _ = fs::remove_dir_all(&root);
3928 fs::create_dir_all(root.join(".rag-rat")).unwrap();
3929 let database = root.join(".rag-rat/index.sqlite");
3930 let conn = rusqlite::Connection::open(&database).unwrap();
3931 conn.execute_batch(
3932 "
3933 CREATE TABLE schema_version(
3934 id TEXT PRIMARY KEY,
3935 applied_at_ms INTEGER NOT NULL,
3936 checksum TEXT NOT NULL,
3937 description TEXT NOT NULL
3938 );
3939 INSERT INTO schema_version(id, applied_at_ms, checksum, description)
3940 VALUES ('__dirty__', 1, '', 'partial migration in progress');
3941 ",
3942 )
3943 .unwrap();
3944 drop(conn);
3945
3946 let dirty = IndexDatabase::migration_check(&database).unwrap();
3947 assert_eq!(dirty.state, schema::SchemaState::Dirty);
3948 let err = IndexDatabase::open(&database).unwrap_err().to_string();
3949 assert!(err.contains("dirty or partial"), "{err}");
3950
3951 let conn = rusqlite::Connection::open(&database).unwrap();
3952 conn.execute_batch(
3953 "
3954 DELETE FROM schema_version;
3955 INSERT INTO schema_version(id, applied_at_ms, checksum, description)
3956 VALUES ('999_future_schema', 1, 'sha256:future', 'future schema');
3957 ",
3958 )
3959 .unwrap();
3960 drop(conn);
3961 let newer = IndexDatabase::migration_check(&database).unwrap();
3962 assert_eq!(newer.state, schema::SchemaState::Newer);
3963 let err = IndexDatabase::open(&database).unwrap_err().to_string();
3964 assert!(err.contains("newer rag-rat"), "{err}");
3965
3966 fs::remove_dir_all(root).unwrap();
3967 }
3968
3969 #[test]
3970 fn discover_mode_indexes_new_files_and_removes_deleted_files() {
3971 let root = unique_temp_root();
3972 let _ = fs::remove_dir_all(&root);
3973 fs::create_dir_all(root.join("src")).unwrap();
3974 fs::write(root.join("src/lib.rs"), "pub fn old_symbol() {}\n").unwrap();
3975 let config = source_config(root.clone(), Language::Rust);
3976 let db = IndexDatabase::rebuild(&config).unwrap();
3977 assert_eq!(db.discovery_status(&config).unwrap().unindexed_source_files, 0);
3978
3979 fs::write(root.join("src/new.rs"), "pub fn new_symbol() {}\n").unwrap();
3980 fs::remove_file(root.join("src/lib.rs")).unwrap();
3981 let drift = db.discovery_status(&config).unwrap();
3982 assert_eq!(drift.unindexed_source_files, 1);
3983 assert_eq!(drift.removed_indexed_files, 1);
3984 assert!(drift.warning.as_deref().unwrap().contains("rag-rat index --discover"));
3985
3986 let db = IndexDatabase::index_discover(&config).unwrap();
3987 let fresh = db.discovery_status(&config).unwrap();
3988 assert_eq!(fresh.unindexed_source_files, 0);
3989 assert_eq!(fresh.removed_indexed_files, 0);
3990 assert!(fresh.warning.is_none());
3991 assert_eq!(db.symbols("new_symbol", Some(Language::Rust), 10).unwrap().len(), 1);
3992 assert!(db.symbols("old_symbol", Some(Language::Rust), 10).unwrap().is_empty());
3993
3994 let mut events = Vec::new();
3995 let db = IndexDatabase::index_discover_with_progress(&config, |progress| {
3996 events.push(progress);
3997 })
3998 .unwrap();
3999 assert!(matches!(events.last(), Some(IndexProgress::Finished { files: 0 })));
4000 assert!(
4001 !events.iter().any(|event| matches!(
4002 event,
4003 IndexProgress::PreparingFile { .. } | IndexProgress::IndexingFile { .. }
4004 )),
4005 "no-op discover should not prepare or index files: {events:?}"
4006 );
4007 assert_eq!(db.symbols("new_symbol", Some(Language::Rust), 10).unwrap().len(), 1);
4008
4009 fs::remove_dir_all(root).unwrap();
4010 }
4011
4012 #[cfg(unix)]
4013 #[test]
4014 fn indexing_skips_symlink_loops() {
4015 let root = unique_temp_root();
4016 let _ = fs::remove_dir_all(&root);
4017 fs::create_dir_all(root.join("src")).unwrap();
4018 fs::write(root.join("src/lib.rs"), "pub fn loop_safe_symbol() {}\n").unwrap();
4019 std::os::unix::fs::symlink(&root, root.join("src/loop")).unwrap();
4020
4021 let config = source_config(root.clone(), Language::Rust);
4022 let db = IndexDatabase::rebuild(&config).unwrap();
4023
4024 assert_eq!(db.symbols("loop_safe_symbol", Some(Language::Rust), 10).unwrap().len(), 1);
4025
4026 fs::remove_dir_all(root).unwrap();
4027 }
4028
4029 #[test]
4030 fn dirty_git_files_are_indexed_as_worktree_overlay() {
4031 let root = unique_temp_root();
4032 let _ = fs::remove_dir_all(&root);
4033 let docs = root.join("docs");
4034 fs::create_dir_all(&docs).unwrap();
4035 fs::write(docs.join("search.md"), "# Title\nbase token\n").unwrap();
4036 run_git(&root, &["init"]);
4037 run_git(&root, &["add", "."]);
4038 run_git(
4039 &root,
4040 &[
4041 "-c",
4042 "user.name=Rag Rat Test",
4043 "-c",
4044 "user.email=rag-rat@example.invalid",
4045 "commit",
4046 "-m",
4047 "initial",
4048 ],
4049 );
4050
4051 let config = markdown_config_for_root(root.clone());
4052 let db = IndexDatabase::rebuild(&config).unwrap();
4053 assert_eq!(db.search("base", 10, false).unwrap().len(), 1);
4054
4055 fs::write(docs.join("search.md"), "# Title\noverlay token\n").unwrap();
4056 let db = IndexDatabase::index_changed(&config).unwrap();
4057 let scopes = db
4058 .storage
4059 .connection()
4060 .prepare(
4061 "
4062 SELECT commit_sha != '', worktree_id != ''
4063 FROM main.files
4064 WHERE path = 'docs/search.md'
4065 ORDER BY commit_sha != '' DESC, worktree_id != '' DESC
4066 ",
4067 )
4068 .unwrap()
4069 .query_map([], |row| Ok((row.get::<_, bool>(0)?, row.get::<_, bool>(1)?)))
4070 .unwrap()
4071 .collect::<Result<Vec<_>, _>>()
4072 .unwrap();
4073
4074 assert_eq!(scopes, vec![(true, false), (false, true)]);
4075 assert!(db.search("base", 10, false).unwrap().is_empty());
4076 let overlay_hits = db.search("overlay", 10, false).unwrap();
4077 assert_eq!(overlay_hits.len(), 1);
4078 assert!(overlay_hits[0].summary.contains("overlay token"));
4079
4080 fs::remove_dir_all(root).unwrap();
4081 }
4082
4083 #[test]
4084 fn rebuild_populates_revision_metadata_and_fresh_fts_state() {
4085 let (root, config) = markdown_config("alpha token");
4086 let db = IndexDatabase::rebuild(&config).unwrap();
4087 let status = db.status(&config.database).unwrap();
4088
4089 assert!(!status.content_revision.is_empty());
4090 assert_eq!(status.fts_source_revision.as_deref(), Some(status.content_revision.as_str()));
4091 assert_eq!(
4092 db.meta("content_revision").unwrap().as_deref(),
4093 Some(status.content_revision.as_str())
4094 );
4095 assert!(!status.fts_dirty);
4096 assert!(status.fts_fresh);
4097 assert!(!status.git_history.available);
4098 assert_eq!(status.git_history.commit_count, 0);
4099 assert_eq!(status.local_ai.embedding.state, "MissingModel");
4100 assert_eq!(status.local_ai.fastembed.backend, "fastembed");
4101 assert_eq!(status.local_ai.fastembed.model, ai::FASTEMBED_DISPLAY_MODEL);
4102 assert_eq!(status.local_ai.fastembed.dim, ai::FASTEMBED_EMBEDDING_DIM);
4103 assert!(!status.local_ai.fastembed.cache.is_empty());
4104 assert_eq!(status.local_ai.fastembed.build_feature_enabled, cfg!(feature = "fastembed"));
4105 assert_eq!(status.local_ai.artifacts.total_chunks, 1);
4106 assert_eq!(
4107 status.local_ai.artifacts.eligible_chunks + status.local_ai.artifacts.skipped_chunks,
4108 status.local_ai.artifacts.total_chunks
4109 );
4110 assert_eq!(
4111 status.local_ai.fastembed.eligible_embeddings
4112 + status.local_ai.fastembed.skipped_embeddings,
4113 status.local_ai.artifacts.total_chunks
4114 );
4115 assert_eq!(indexed_revision_count(&db), 1);
4116 assert_eq!(chunk_source_revision_count(&db), 1);
4117
4118 fs::remove_dir_all(root).unwrap();
4119 }
4120
4121 #[cfg(not(feature = "fastembed"))]
4122 #[test]
4123 fn fastembed_missing_feature_reports_rebuild_command() {
4124 let (root, config) = markdown_config("alpha token\n");
4125 let db = IndexDatabase::rebuild(&config).unwrap();
4126
4127 let err = db.install_model(ai::FASTEMBED_MODEL_ID).unwrap_err();
4128 assert!(err.to_string().contains(ai::FASTEMBED_MISSING_FEATURE_MESSAGE));
4129
4130 let status = db.local_ai_status().unwrap();
4131 assert!(!status.fastembed.build_feature_enabled);
4132 assert_eq!(status.fastembed.status, "MissingRuntime");
4133 assert_eq!(
4134 status.fastembed.message.as_deref(),
4135 Some(ai::FASTEMBED_MISSING_FEATURE_MESSAGE)
4136 );
4137 assert_eq!(status.fastembed.next.as_deref(), Some("cargo install rag-rat"));
4138
4139 fs::remove_dir_all(root).unwrap();
4140 }
4141
4142 #[test]
4143 fn reconcile_requires_explicit_model_install_and_ignores_stale_artifacts() {
4144 let (root, config) = markdown_config(
4145 "alpha token\nsecond line with enough detail for the semantic embedding policy to keep this chunk\nthird line with runtime context\n",
4146 );
4147 let db = IndexDatabase::rebuild(&config).unwrap();
4148 let chunk_id = first_chunk_id(&db);
4149
4150 let models = db.list_models().unwrap();
4151 let embedding = models.iter().find(|model| model.model_id == ai::HASH_MODEL_ID).unwrap();
4152 assert!(!embedding.installed);
4153 assert_eq!(embedding.status, "MissingModel");
4154
4155 let hits = db.search("alpha", 10, false).unwrap();
4156 assert_eq!(hits.len(), 1);
4157 assert!(hits[0].summary.contains("alpha token"));
4158
4159 let blocked = db.reconcile(Some(1), Some(8)).unwrap();
4160 assert_eq!(blocked.processed_chunks, 0);
4161 assert_eq!(blocked.embeddings_written, 0);
4162 assert_eq!(blocked.blocked_chunks, 0);
4163 assert_eq!(blocked.model_id, ai::HASH_MODEL_ID);
4164 assert_eq!(blocked.batch_size, 8);
4165 assert_eq!(blocked.status, "Blocked");
4166
4167 let status = db.local_ai_status().unwrap();
4168 assert_eq!(status.embedding.state, "MissingModel");
4169 assert_eq!(status.embedding.blocked_artifacts, 0);
4170
4171 db.install_model(ai::HASH_MODEL_ID).unwrap();
4172 let plan = db.reconcile_plan().unwrap();
4173 assert_eq!(plan.embeddings.missing, 1);
4174 assert_eq!(plan.embeddings.current, 0);
4175 let current = db.reconcile(Some(1), Some(8)).unwrap();
4176 assert_eq!(current.embeddings_written, 1);
4177 assert_eq!(current.model_id, ai::HASH_MODEL_ID);
4178 assert_eq!(current.model_version, "hash-v1");
4179 assert_eq!(current.embedding_dim, ai::HASH_EMBEDDING_DIM);
4180 assert_eq!(current.status, "Current");
4181 assert_eq!(current.work_reasons.get("Missing"), Some(&1));
4182 let noop = db.reconcile(None, Some(8)).unwrap();
4183 assert_eq!(noop.processed_chunks, 0);
4184 assert_eq!(noop.embeddings_written, 0);
4185 let status = db.local_ai_status().unwrap();
4186 assert_eq!(status.embedding.state, "Ready");
4187 assert_eq!(status.embedding.current_artifacts, 1);
4188 let embedding_bytes: i64 = db
4189 .storage
4190 .connection()
4191 .query_row(
4192 "SELECT length(vector_blob) FROM chunk_embeddings WHERE chunk_id = ?1 AND status = 'Current'",
4193 [chunk_id],
4194 |row| row.get(0),
4195 )
4196 .unwrap();
4197 assert_eq!(embedding_bytes, (ai::HASH_EMBEDDING_DIM * 4) as i64);
4198
4199 let hits = db.search("alpha", 10, false).unwrap();
4200 assert!(hits[0].summary.contains("alpha token"));
4201
4202 db.storage.connection().execute("DELETE FROM chunk_fts", []).unwrap();
4203 let vector_hits = db.search("alpha", 10, false).unwrap();
4204 assert_eq!(vector_hits.len(), 1);
4205 assert_eq!(vector_hits[0].chunk_id, chunk_id);
4206
4207 db.storage
4208 .connection()
4209 .execute(
4210 "UPDATE chunk_embeddings SET source_text_hash = 'old-hash' WHERE chunk_id = ?1",
4211 [chunk_id],
4212 )
4213 .unwrap();
4214 let plan = db.reconcile_plan().unwrap();
4215 assert_eq!(plan.embeddings.current, 0);
4216 assert_eq!(plan.embeddings.stale, 1);
4217 let refreshed = db.reconcile(None, Some(8)).unwrap();
4218 assert_eq!(refreshed.processed_chunks, 1);
4219 assert_eq!(refreshed.work_reasons.get("SourceChanged"), Some(&1));
4220 assert_eq!(db.current_embedding_count(ai::HASH_MODEL_ID).unwrap(), 1);
4221 let stale_embedding_hits = db.search("alpha", 10, false).unwrap();
4222 assert_eq!(stale_embedding_hits.len(), 1);
4223
4224 fs::remove_dir_all(root).unwrap();
4225 }
4226
4227 #[cfg(feature = "fastembed")]
4228 #[test]
4229 fn cached_fastembed_model_recovers_ready_state() {
4230 let (root, config) = markdown_config("alpha token\n");
4231 let db = IndexDatabase::rebuild(&config).unwrap();
4232 let cache_dir = root.join("models");
4233 let revision = "5f1b8cd78bc4fb444dd171e59b18f3a3af89a079";
4234 let repo = cache_dir.join("models--Qdrant--all-MiniLM-L6-v2-onnx");
4235 fs::create_dir_all(repo.join("refs")).unwrap();
4236 fs::create_dir_all(repo.join("snapshots").join(revision)).unwrap();
4237 fs::write(repo.join("refs").join("main"), revision).unwrap();
4238
4239 ai::recover_cached_fastembed_model_at(db.storage.connection(), &cache_dir).unwrap();
4240
4241 let models = db.list_models().unwrap();
4242 let fastembed =
4243 models.iter().find(|model| model.model_id == ai::FASTEMBED_MODEL_ID).unwrap();
4244 assert!(fastembed.installed);
4245 assert_eq!(fastembed.status, "Ready");
4246 let status = db.local_ai_status().unwrap();
4247 assert_eq!(status.fastembed.status, "Ready");
4248 assert!(status.fastembed.active);
4249
4250 fs::remove_dir_all(root).unwrap();
4251 }
4252
4253 #[cfg(feature = "fastembed")]
4254 #[test]
4255 fn compatible_migrate_recovers_cached_fastembed_model() {
4256 let (root, config) = markdown_config("alpha token\n");
4257 let db = IndexDatabase::rebuild(&config).unwrap();
4258 let cache_dir = root.join("models");
4259 let revision = "5f1b8cd78bc4fb444dd171e59b18f3a3af89a079";
4260 let repo = cache_dir.join("models--Qdrant--all-MiniLM-L6-v2-onnx");
4261 fs::create_dir_all(repo.join("refs")).unwrap();
4262 fs::create_dir_all(repo.join("snapshots").join(revision)).unwrap();
4263 fs::write(repo.join("refs").join("main"), revision).unwrap();
4264 db.storage
4265 .connection()
4266 .execute(
4267 "UPDATE ai_models
4268 SET installed = 0, status = 'MissingModel', installed_at_ms = NULL
4269 WHERE model_id = ?1",
4270 [ai::FASTEMBED_MODEL_ID],
4271 )
4272 .unwrap();
4273
4274 IndexDatabase::migrate_with_fastembed_cache(&config.database, Some(&cache_dir)).unwrap();
4275
4276 let db = IndexDatabase::open(&config.database).unwrap();
4277 let status = db.local_ai_status().unwrap();
4278 assert_eq!(status.fastembed.status, "Ready");
4279 assert!(status.fastembed.active);
4280
4281 fs::remove_dir_all(root).unwrap();
4282 }
4283
4284 #[test]
4285 fn reconcile_without_limit_processes_all_chunks() {
4286 let (root, config) = markdown_config(
4287 "# One\nalpha token with enough surrounding detail for embedding eligibility and useful semantic context\n\n# Two\nbeta token with enough surrounding detail for embedding eligibility and useful semantic context\n",
4288 );
4289 let db = IndexDatabase::rebuild(&config).unwrap();
4290 db.install_model(ai::HASH_MODEL_ID).unwrap();
4291
4292 let report = db.reconcile(None, Some(2)).unwrap();
4293
4294 assert_eq!(report.processed_chunks, 2);
4295 assert_eq!(report.embeddings_written, 2);
4296 assert_eq!(report.batch_size, 2);
4297 assert_eq!(db.current_embedding_count(ai::HASH_MODEL_ID).unwrap(), 2);
4298 let second = db.reconcile(None, Some(2)).unwrap();
4299 assert_eq!(second.processed_chunks, 0);
4300
4301 fs::remove_dir_all(root).unwrap();
4302 }
4303
4304 #[test]
4305 fn reconcile_treats_c_chunks_as_embedding_eligible() {
4306 let root = unique_temp_root();
4307 let _ = fs::remove_dir_all(&root);
4308 fs::create_dir_all(root.join("src")).unwrap();
4309 fs::write(
4310 root.join("src/main.c"),
4311 r#"
4312static int read_sensor_value(int baseline)
4313{
4314 int adjusted = baseline + 42;
4315 return adjusted;
4316}
4317
4318int main(void)
4319{
4320 int sample = read_sensor_value(7);
4321 return sample == 49 ? 0 : 1;
4322}
4323"#,
4324 )
4325 .unwrap();
4326 let config = source_config(root.clone(), Language::C);
4327 let db = IndexDatabase::rebuild(&config).unwrap();
4328 db.install_model(ai::HASH_MODEL_ID).unwrap();
4329
4330 let plan = db.reconcile_plan().unwrap();
4331
4332 assert_eq!(plan.embeddings.skipped_by_policy.get("SkipLanguageUnsupported"), None);
4333 assert!(plan.embeddings.missing > 0, "plan: {:?}", plan.embeddings);
4334
4335 let report = db.reconcile(None, Some(8)).unwrap();
4336 assert!(report.embeddings_written > 0, "report: {report:?}");
4337
4338 fs::remove_dir_all(root).unwrap();
4339 }
4340
4341 #[test]
4342 fn reconcile_policy_skips_tiny_chunks_before_embedding() {
4343 let (root, config) = markdown_config("tiny\n");
4344 let db = IndexDatabase::rebuild(&config).unwrap();
4345 db.install_model(ai::HASH_MODEL_ID).unwrap();
4346
4347 let plan = db.reconcile_plan().unwrap();
4348 assert_eq!(plan.embeddings.missing, 0);
4349 assert_eq!(plan.embeddings.skipped_by_policy.get("SkipTooSmall"), Some(&1));
4350
4351 let report = db.reconcile(None, Some(8)).unwrap();
4352 assert_eq!(report.embeddings_written, 0);
4353 assert_eq!(report.skipped_by_policy.get("SkipTooSmall"), Some(&1));
4354 assert_eq!(db.current_embedding_count(ai::HASH_MODEL_ID).unwrap(), 0);
4355
4356 fs::remove_dir_all(root).unwrap();
4357 }
4358
4359 #[test]
4360 fn reconcile_plan_reports_policy_skips_for_fastembed_model() {
4361 let (root, config) = markdown_config("tiny\n");
4362 let db = IndexDatabase::rebuild(&config).unwrap();
4363 db.storage
4364 .connection()
4365 .execute(
4366 "UPDATE ai_models
4367 SET installed = 1, disabled = 0, status = 'Ready', embedding_dim = ?2
4368 WHERE model_id = ?1",
4369 params![
4370 ai::FASTEMBED_MODEL_ID,
4371 i64::try_from(ai::FASTEMBED_EMBEDDING_DIM).unwrap()
4372 ],
4373 )
4374 .unwrap();
4375 db.storage
4376 .connection()
4377 .execute(
4378 "INSERT INTO index_meta(key, value) VALUES ('active_embedding_model', ?1)
4379 ON CONFLICT(key) DO UPDATE SET value = excluded.value",
4380 [ai::FASTEMBED_MODEL_ID],
4381 )
4382 .unwrap();
4383
4384 let plan = db.reconcile_plan().unwrap();
4385
4386 assert_eq!(plan.embeddings.model_id, ai::FASTEMBED_MODEL_ID);
4387 assert_eq!(plan.embeddings.missing, 0);
4388 assert_eq!(plan.embeddings.skipped_by_policy.get("SkipTooSmall"), Some(&1));
4389
4390 fs::remove_dir_all(root).unwrap();
4391 }
4392
4393 #[cfg(not(feature = "fastembed"))]
4394 #[test]
4395 fn blocked_fastembed_reconcile_still_reports_policy_skips() {
4396 let (root, config) = markdown_config("tiny\n");
4397 let db = IndexDatabase::rebuild(&config).unwrap();
4398 db.storage
4399 .connection()
4400 .execute(
4401 "INSERT INTO index_meta(key, value) VALUES ('active_embedding_model', ?1)
4402 ON CONFLICT(key) DO UPDATE SET value = excluded.value",
4403 [ai::FASTEMBED_MODEL_ID],
4404 )
4405 .unwrap();
4406
4407 let report = db.reconcile(None, Some(8)).unwrap();
4408
4409 assert_eq!(report.status, "Blocked");
4410 assert_eq!(report.skipped_by_policy.get("SkipTooSmall"), Some(&1));
4411
4412 fs::remove_dir_all(root).unwrap();
4413 }
4414
4415 #[test]
4416 fn search_explain_reports_weighted_score_components() {
4417 let (root, config) = markdown_config(
4418 "alpha runtime shutdown\nsecond line with enough detail for embedding eligibility and semantic vector scoring\nthird line\n",
4419 );
4420 let db = IndexDatabase::rebuild(&config).unwrap();
4421 db.install_model(ai::HASH_MODEL_ID).unwrap();
4422 db.reconcile(None, Some(8)).unwrap();
4423
4424 let hits = db.search_explain("runtime shutdown", 10, false).unwrap();
4425
4426 assert_eq!(hits.len(), 1);
4427 let components = hits[0].score_components.as_ref().unwrap();
4428 let component_sum = components.bm25
4429 + components.vector
4430 + components.symbol
4431 + components.graph
4432 + components.git
4433 + components.github;
4434 assert!((hits[0].score - component_sum).abs() < 0.000_001);
4435 assert!(components.bm25 > 0.0);
4436 assert!(components.vector > 0.0);
4437 assert!(components.vector_note.is_none());
4438 assert!(components.bm25 <= 0.45);
4439 assert!(components.vector <= 0.35);
4440 assert!(components.symbol <= 0.10);
4441 assert!(components.graph <= 0.05);
4442 assert!(components.git <= 0.03);
4443 assert!(components.github <= 0.02);
4444 assert!(db.search("runtime shutdown", 10, false).unwrap()[0].score_components.is_none());
4445
4446 fs::remove_dir_all(root).unwrap();
4447 }
4448
4449 #[test]
4450 fn search_explain_labels_missing_vector_runtime() {
4451 let (root, config) = markdown_config(
4452 "alpha runtime shutdown\nsecond line with enough detail for lexical search without embeddings\nthird line\n",
4453 );
4454 let db = IndexDatabase::rebuild(&config).unwrap();
4455
4456 let hits = db.search_explain("runtime shutdown", 10, false).unwrap();
4457
4458 assert_eq!(hits.len(), 1);
4459 let components = hits[0].score_components.as_ref().unwrap();
4460 assert!(components.bm25 > 0.0);
4461 assert_eq!(components.vector, 0.0);
4462 assert_eq!(
4463 components.vector_note.as_deref(),
4464 Some("vector search unavailable: no current embedding model")
4465 );
4466
4467 fs::remove_dir_all(root).unwrap();
4468 }
4469
4470 #[test]
4471 fn git_history_indexes_commits_paths_queries_and_blame() {
4472 let root = unique_temp_root();
4473 let _ = fs::remove_dir_all(&root);
4474 fs::create_dir_all(root.join("docs")).unwrap();
4475 fs::create_dir_all(root.join("src")).unwrap();
4476 run_git(&root, &["init"]);
4477 run_git(&root, &["config", "user.name", "Rag Rat"]);
4478 run_git(&root, &["config", "user.email", "rag@example.com"]);
4479
4480 fs::write(root.join("docs/search.md"), "# Title\nalpha token\n").unwrap();
4481 fs::write(root.join("src/lib.rs"), "pub fn tracked_symbol() {}\n").unwrap();
4482 run_git(&root, &["add", "."]);
4483 run_git(&root, &["commit", "-m", "Add alpha docs"]);
4484
4485 fs::write(root.join("docs/search.md"), "# Title\nbeta token\n").unwrap();
4486 run_git(&root, &["add", "."]);
4487 run_git(&root, &["commit", "-m", "Refresh beta docs"]);
4488
4489 let config = Config {
4490 root: root.clone(),
4491 database: root.join(".rag-rat/index.sqlite"),
4492 targets: vec![
4493 ResolvedTarget {
4494 name: "markdown".to_string(),
4495 language: Language::Markdown,
4496 directories: vec![PathBuf::from("docs")],
4497 include: vec!["**/*.md".to_string()],
4498 exclude: Vec::new(),
4499 kind: TargetKind::Docs,
4500 },
4501 ResolvedTarget {
4502 name: "rust".to_string(),
4503 language: Language::Rust,
4504 directories: vec![PathBuf::from("src")],
4505 include: vec!["**/*.rs".to_string()],
4506 exclude: Vec::new(),
4507 kind: TargetKind::Source,
4508 },
4509 ],
4510 local_ai: Default::default(),
4511 };
4512 let db = IndexDatabase::rebuild(&config).unwrap();
4513 let status = db.status(&config.database).unwrap();
4514 assert!(status.git_history.available);
4515 assert!(status.git_history.head.is_some());
4516 assert_eq!(status.git_history.indexed_head, status.git_history.head);
4517 assert_eq!(status.git_history.commit_count, 2);
4518 assert_eq!(status.git_history.file_change_count, 3);
4519
4520 let commit_hits = db.commit_search("beta", 10).unwrap();
4521 assert_eq!(commit_hits.len(), 1);
4522 assert_eq!(commit_hits[0].subject, "Refresh beta docs");
4523 assert_eq!(commit_hits[0].evidence_kind, "historical");
4524 assert!(commit_hits[0].score > 0.0);
4525
4526 let path_history = db.git_history_for_path("docs/search.md", 10).unwrap();
4527 assert_eq!(path_history.len(), 2);
4528 assert!(path_history.iter().all(|item| item.evidence_kind == "historical"));
4529
4530 let symbol_history =
4531 db.git_history_for_symbol("tracked_symbol", Some(Language::Rust), 10).unwrap();
4532 assert_eq!(symbol_history.len(), 1);
4533 assert_eq!(symbol_history[0].path, "src/lib.rs");
4534 assert_eq!(symbol_history[0].evidence_kind, "historical");
4535 let impact = db.impact_surface("tracked_symbol", 10).unwrap();
4536 assert!(impact.iter().any(|item| {
4537 item.category == "Direct structural impact" && item.reason == "exact_symbol_definition"
4538 }));
4539 assert!(impact.iter().any(|item| {
4540 item.category == "Historical/papertrail evidence"
4541 && item.reason == "git_commit_touched_file"
4542 }));
4543
4544 let query_commits = db.commits_touching_query("beta", 10).unwrap();
4545 let beta_commit =
4546 query_commits.iter().find(|hit| hit.subject == "Refresh beta docs").unwrap();
4547 assert!(beta_commit.evidence.iter().any(|value| value == "commit_message"));
4548 assert!(beta_commit.evidence.iter().any(|value| value == "file_change"));
4549 assert_eq!(beta_commit.evidence_kind, "historical");
4550
4551 let chunk_id = first_chunk_id(&db);
4552 let blame = db.git_blame_chunk(chunk_id).unwrap().unwrap();
4553 assert_eq!(blame.source_text_hash, hex_sha256("# Title\nbeta token\n".as_bytes()));
4554 assert_eq!(blame.line_count, 2);
4555 assert_eq!(blame.commit_counts.values().sum::<i64>(), 2);
4556 assert!(blame.dominant_commit_lines >= 1);
4557 assert!(blame.dominant_commit.is_some());
4558 assert_eq!(blame.evidence_kind, "historical");
4559 let cached = db.git_blame_chunk(chunk_id).unwrap().unwrap();
4560 assert_eq!(cached.source_text_hash, blame.source_text_hash);
4561
4562 fs::remove_dir_all(root).unwrap();
4563 }
4564
4565 #[test]
4566 fn indexes_rust_graph_edges_from_tree_sitter() {
4567 let root = unique_temp_root();
4568 let _ = fs::remove_dir_all(&root);
4569 fs::create_dir_all(root.join("src")).unwrap();
4570 fs::write(
4571 root.join("src/lib.rs"),
4572 r#"
4573use crate::worker::Worker;
4574mod worker;
4575
4576trait Service {
4577 fn serve(&self);
4578}
4579
4580struct Worker;
4581
4582impl Service for Worker {
4583 fn serve(&self) {
4584 helper();
4585 }
4586}
4587
4588fn helper() {}
4589
4590fn caller() {
4591 helper();
4592 Worker.serve();
4593}
4594"#,
4595 )
4596 .unwrap();
4597 let config = source_config(root.clone(), Language::Rust);
4598 let db = IndexDatabase::rebuild(&config).unwrap();
4599
4600 assert_edge(&db, "caller", "helper", "calls_name", "Syntactic");
4601 assert_edge(&db, "Worker", "Service", "implements", "Syntactic");
4602 assert_edge(&db, "src/lib.rs", "worker", "imports", "Syntactic");
4603 let callers = db.find_callers("helper", 10).unwrap();
4604 assert!(
4605 callers.iter().any(|edge| {
4606 edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("caller"))
4607 && edge.edge_kind == "calls_name"
4608 }),
4609 "helper callers: {callers:?}"
4610 );
4611
4612 fs::remove_dir_all(root).unwrap();
4613 }
4614
4615 #[test]
4616 fn ffi_surface_labels_exported_impl_members_separately() {
4617 let root = unique_temp_root();
4618 let _ = fs::remove_dir_all(&root);
4619 fs::create_dir_all(root.join("src")).unwrap();
4620 fs::write(
4621 root.join("src/lib.rs"),
4622 r#"
4623pub struct PhraseRepo;
4624
4625#[uniffi::export]
4626impl PhraseRepo {
4627 pub fn children(&self) {}
4628 pub fn journal(&self) {}
4629}
4630
4631#[cfg_attr(not(target_arch = "wasm32"), uniffi::export(async_runtime = "tokio"))]
4632impl Runtime {
4633 pub fn route_search_query(&self) {}
4634}
4635
4636pub struct Runtime;
4637
4638/// Not #[uniffi::export]: this is an internal helper.
4639pub fn internal_helper() {}
4640
4641#[cfg_attr(target_arch = "wasm32", ::uniffi::export)]
4642pub fn exported_fn() {}
4643"#,
4644 )
4645 .unwrap();
4646 let config = source_config(root.clone(), Language::Rust);
4647 let db = IndexDatabase::rebuild(&config).unwrap();
4648
4649 let surface = db.ffi_surface(20).unwrap();
4650 assert!(
4651 surface.iter().any(|item| {
4652 item.reason == "rust_uniffi_export"
4653 && item.symbol.as_deref().is_some_and(|symbol| symbol.ends_with("exported_fn"))
4654 }),
4655 "direct export should remain direct: {surface:?}"
4656 );
4657 assert!(
4658 surface.iter().any(|item| item.reason == "rust_uniffi_exported_impl"),
4659 "exported impl/type surface should be explicit: {surface:?}"
4660 );
4661 assert!(
4662 surface.iter().any(|item| {
4663 item.reason == "rust_uniffi_impl_member"
4664 && item
4665 .symbol
4666 .as_deref()
4667 .is_some_and(|symbol| symbol.ends_with("route_search_query"))
4668 }),
4669 "cfg_attr exported impl member should be labeled separately: {surface:?}"
4670 );
4671 assert!(
4672 surface.iter().any(|item| {
4673 item.reason == "rust_uniffi_impl_member"
4674 && item.symbol.as_deref().is_some_and(|symbol| symbol.ends_with("children"))
4675 }),
4676 "impl member should be labeled separately: {surface:?}"
4677 );
4678 assert!(
4679 !surface.iter().any(|item| {
4680 item.reason == "rust_uniffi_export"
4681 && item.symbol.as_deref().is_some_and(|symbol| {
4682 symbol.ends_with("children") || symbol.ends_with("journal")
4683 })
4684 }),
4685 "impl members must not be reported as direct exports: {surface:?}"
4686 );
4687 assert!(
4688 !surface.iter().any(|item| {
4689 item.symbol.as_deref().is_some_and(|symbol| symbol.ends_with("internal_helper"))
4690 }),
4691 "comment-only UniFFI mentions must not create FFI surface rows: {surface:?}"
4692 );
4693
4694 fs::remove_dir_all(root).unwrap();
4695 }
4696
4697 #[test]
4698 fn search_and_read_chunk_attach_bounded_graph_evidence() {
4699 let root = unique_temp_root();
4700 let _ = fs::remove_dir_all(&root);
4701 fs::create_dir_all(root.join("src")).unwrap();
4702 fs::write(
4703 root.join("src/lib.rs"),
4704 "pub fn helper() {}\n\npub fn caller() {\n helper();\n}\n",
4705 )
4706 .unwrap();
4707 let config = source_config(root.clone(), Language::Rust);
4708 let db = IndexDatabase::rebuild(&config).unwrap();
4709
4710 let hits = db.search("helper caller", 10, false).unwrap();
4711 let helper_hit = hits
4712 .iter()
4713 .find(|hit| hit.symbol_path.as_deref().is_some_and(|path| path.ends_with("helper")))
4714 .expect("helper search hit");
4715 let helper_graph = helper_hit.graph.as_ref().expect("helper graph evidence");
4716 assert_eq!(helper_graph.caller_count, 1);
4717 assert!(helper_graph.top_callers.iter().any(|caller| {
4718 caller.symbol_path.ends_with("caller")
4719 && caller.callsite.line == 4
4720 && caller.callsite.span == [4, 4]
4721 && caller.confidence == "syntactic"
4722 }));
4723 assert!(helper_graph.callers.is_empty(), "search keeps graph compact");
4724
4725 let caller_hit = hits
4726 .iter()
4727 .find(|hit| hit.symbol_path.as_deref().is_some_and(|path| path.ends_with("caller")))
4728 .expect("caller search hit");
4729 let caller_graph = caller_hit.graph.as_ref().expect("caller graph evidence");
4730 assert!(caller_graph.top_callees.iter().any(|callee| {
4731 callee.target == "helper"
4732 && callee.callsite.line == 4
4733 && callee.callsite.span == [4, 4]
4734 && callee.confidence == "syntactic"
4735 }));
4736
4737 let chunk = db.read_chunk(caller_hit.chunk_id).unwrap().expect("caller chunk");
4738 let full_graph = chunk.graph.as_ref().expect("full read_chunk graph");
4739 assert!(full_graph.symbol.as_ref().is_some_and(|symbol| symbol.name == "caller"));
4740 assert!(
4741 full_graph
4742 .callees
4743 .iter()
4744 .any(|callee| callee.target == "helper" && callee.callsite.line == 4)
4745 );
4746 assert!(full_graph.notes.iter().any(|note| note.contains("tree-sitter/syntactic")));
4747
4748 fs::remove_dir_all(root).unwrap();
4749 }
4750
4751 #[test]
4752 fn graph_exact_mode_requires_verified_symbol_identity() {
4753 let root = unique_temp_root();
4754 let _ = fs::remove_dir_all(&root);
4755 fs::create_dir_all(root.join("src")).unwrap();
4756 fs::write(
4757 root.join("src/lib.rs"),
4758 "pub fn helper() {}\n\npub fn caller() {\n helper();\n}\n",
4759 )
4760 .unwrap();
4761 let config = source_config(root.clone(), Language::Rust);
4762 let db = IndexDatabase::rebuild(&config).unwrap();
4763 let helper = db.symbols("helper", Some(Language::Rust), 10).unwrap().remove(0);
4764 let caller = db.symbols("caller", Some(Language::Rust), 10).unwrap().remove(0);
4765
4766 let bare_exact = db
4767 .find_callers_with_options(
4768 "helper",
4769 10,
4770 &crate::query::graph::GraphTraversalOptions {
4771 resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
4772 ..Default::default()
4773 },
4774 )
4775 .unwrap();
4776 assert!(bare_exact.is_empty(), "bare exact lookup should not fall back: {bare_exact:?}");
4777
4778 let exact_callers = db
4779 .find_callers_with_options(
4780 "helper",
4781 10,
4782 &crate::query::graph::GraphTraversalOptions {
4783 resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
4784 symbol_id: Some(helper.symbol_id),
4785 ..Default::default()
4786 },
4787 )
4788 .unwrap();
4789 assert!(
4790 exact_callers.iter().any(|edge| {
4791 edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("caller"))
4792 && edge.verified_target_symbol
4793 }),
4794 "exact callers: {exact_callers:?}"
4795 );
4796 assert!(exact_callers.iter().all(|edge| edge.verified_target_symbol));
4797
4798 let exact_callees = db
4799 .trace_callees_with_options(
4800 "caller",
4801 10,
4802 &crate::query::graph::GraphTraversalOptions {
4803 resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
4804 symbol_id: Some(caller.symbol_id),
4805 ..Default::default()
4806 },
4807 )
4808 .unwrap();
4809 assert!(
4810 exact_callees.iter().any(|edge| {
4811 edge.target.as_deref() == Some("helper") && edge.verified_target_symbol
4812 }),
4813 "exact callees: {exact_callees:?}"
4814 );
4815 assert!(exact_callees.iter().all(|edge| edge.verified_target_symbol));
4816
4817 fs::remove_dir_all(root).unwrap();
4818 }
4819
4820 #[test]
4821 fn symbol_lookup_ranks_type_definitions_before_impl_blocks() {
4822 let root = unique_temp_root();
4823 let _ = fs::remove_dir_all(&root);
4824 fs::create_dir_all(root.join("src")).unwrap();
4825 fs::write(
4826 root.join("src/lib.rs"),
4827 r#"
4828impl Database {
4829 pub fn open() -> Self {
4830 Database
4831 }
4832}
4833
4834pub struct Database;
4835"#,
4836 )
4837 .unwrap();
4838 let config = source_config(root.clone(), Language::Rust);
4839 let db = IndexDatabase::rebuild(&config).unwrap();
4840 let hits = db.symbols("Database", Some(Language::Rust), 10).unwrap();
4841 assert!(hits.len() >= 2, "fixture should expose both impl and struct symbols: {hits:?}");
4842 assert_eq!(hits[0].kind, "struct", "Database lookup should prefer type definition");
4843 assert!(
4844 hits.iter().any(|hit| hit.kind == "impl"),
4845 "impl Database should still be available after the struct: {hits:?}"
4846 );
4847
4848 fs::remove_dir_all(root).unwrap();
4849 }
4850
4851 #[test]
4852 fn logical_symbol_exact_mode_covers_duplicate_rust_variants() {
4853 let root = unique_temp_root();
4854 let _ = fs::remove_dir_all(&root);
4855 fs::create_dir_all(root.join("src")).unwrap();
4856 fs::write(
4857 root.join("src/lib.rs"),
4858 r#"
4859#[cfg(not(target_arch = "wasm32"))]
4860pub fn spawn_blocking() {}
4861
4862#[cfg(target_arch = "wasm32")]
4863pub fn spawn_blocking() {}
4864
4865pub fn caller() {
4866 spawn_blocking();
4867}
4868"#,
4869 )
4870 .unwrap();
4871 let config = source_config(root.clone(), Language::Rust);
4872 let db = IndexDatabase::rebuild(&config).unwrap();
4873 let lookup = db
4874 .symbol_candidates(&crate::query::symbol::SymbolSelector {
4875 logical_symbol_id: None,
4876 symbol_id: None,
4877 symbol_path: None,
4878 symbol: Some("spawn_blocking".to_string()),
4879 language: Some(Language::Rust),
4880 allow_ambiguous: true,
4881 limit: 10,
4882 })
4883 .unwrap();
4884 let logical_symbol_id = lookup.candidates[0].logical_symbol_id.expect("logical id");
4885 assert_eq!(lookup.candidates[0].logical_variant_count, Some(2));
4886 assert_eq!(lookup.candidates[0].logical_group_reason.as_deref(), Some("cfg_variant"));
4887
4888 let exact_variant_callers = db
4889 .find_callers_with_options(
4890 "spawn_blocking",
4891 10,
4892 &crate::query::graph::GraphTraversalOptions {
4893 resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
4894 symbol_id: Some(lookup.candidates[1].symbol_id),
4895 ..Default::default()
4896 },
4897 )
4898 .unwrap();
4899 assert!(
4900 exact_variant_callers.iter().any(|edge| {
4901 edge.from_symbol.as_deref().is_some_and(|symbol| symbol.ends_with("caller"))
4902 && edge.target.as_deref() == Some("spawn_blocking")
4903 && edge.verified_target_symbol
4904 }),
4905 "symbol_id exact should include its logical cfg group: {exact_variant_callers:?}"
4906 );
4907 assert!(exact_variant_callers.iter().all(|edge| edge.verified_target_symbol));
4908
4909 let exact_logical = db
4910 .graph_traversal_report(
4911 "find_callers",
4912 &lookup.candidates[0],
4913 true,
4914 10,
4915 &crate::query::graph::GraphTraversalOptions {
4916 resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
4917 symbol_id: Some(lookup.candidates[0].symbol_id),
4918 ..Default::default()
4919 },
4920 )
4921 .unwrap();
4922 assert_eq!(exact_logical.query.logical_symbol_id, Some(logical_symbol_id));
4923 assert_eq!(
4924 exact_logical.logical_symbol.as_ref().map(|symbol| symbol.variant_count),
4925 Some(2)
4926 );
4927 assert_eq!(exact_logical.variants.len(), 2);
4928 assert!(exact_logical.results.iter().all(|edge| edge.verified_target_symbol));
4929 assert!(
4930 exact_logical.results.iter().any(|edge| {
4931 edge.from_symbol.as_deref().is_some_and(|symbol| symbol.ends_with("caller"))
4932 && edge.target.as_deref() == Some("spawn_blocking")
4933 }),
4934 "logical exact callers: {exact_logical:?}"
4935 );
4936
4937 fs::remove_dir_all(root).unwrap();
4938 }
4939
4940 #[test]
4941 fn indexes_real_world_rust_graph_patterns() {
4942 let root = fixture_temp_root("graph-realworld/rust");
4943 let config = source_config(root.clone(), Language::Rust);
4944 let db = IndexDatabase::rebuild(&config).unwrap();
4945
4946 assert_edge(&db, "src/lib.rs", "worker", "imports", "Syntactic");
4947 assert_edge(&db, "src/lib.rs", "Worker", "exports", "Syntactic");
4948 assert_edge(&db, "entry", "new", "calls_name", "NameOnly");
4949 assert_edge(&db, "entry", "Client", "references_type", "Syntactic");
4950 assert_edge(&db, "drive", "serve", "calls_name", "NameOnly");
4951 assert_edge(&db, "drive", "GenericRunner", "references_type", "Syntactic");
4952 assert_edge(&db, "Worker", "Service", "implements", "Syntactic");
4953 assert_edge(&db, "generic_call", "T", "references_type", "NameOnly");
4954 assert_edge(&db, "entry", "generated_call", "uses_macro", "NameOnly");
4955 let syntactic_callers = db.find_callers("serve", 10).unwrap();
4956 assert!(
4957 syntactic_callers.is_empty(),
4958 "syntactic serve callers should avoid receiver/name fallback: {syntactic_callers:?}"
4959 );
4960 let callers = db
4961 .find_callers_with_options(
4962 "serve",
4963 10,
4964 &crate::query::graph::GraphTraversalOptions {
4965 resolution_mode: crate::query::graph::GraphResolutionMode::Fuzzy,
4966 ..Default::default()
4967 },
4968 )
4969 .unwrap();
4970 assert!(
4971 callers.iter().any(|edge| {
4972 edge.edge_kind == "calls_name"
4973 && edge.edge_confidence == edge.confidence
4974 && edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("drive"))
4975 }),
4976 "serve callers: {callers:?}"
4977 );
4978
4979 fs::remove_dir_all(root).unwrap();
4980 }
4981
4982 #[test]
4983 fn indexes_typescript_graph_edges_from_tree_sitter() {
4984 let root = unique_temp_root();
4985 let _ = fs::remove_dir_all(&root);
4986 fs::create_dir_all(root.join("src")).unwrap();
4987 fs::write(
4988 root.join("src/helper.ts"),
4989 "export function helper() {}\nexport const Card = () => null;\n",
4990 )
4991 .unwrap();
4992 fs::write(
4993 root.join("src/App.tsx"),
4994 r#"
4995import { helper, Card } from "./helper";
4996
4997export function run() {
4998 helper();
4999 return <Card />;
5000}
5001
5002export const callRun = () => run();
5003"#,
5004 )
5005 .unwrap();
5006 let config = source_config(root.clone(), Language::TypeScript);
5007 let db = IndexDatabase::rebuild(&config).unwrap();
5008
5009 assert_edge(&db, "run", "helper", "calls_name", "Syntactic");
5010 assert_edge(&db, "run", "Card", "references_type", "Syntactic");
5011 assert_edge(&db, "src/App.tsx", "helper", "imports", "Syntactic");
5012 assert_edge(&db, "src/App.tsx", "run", "exports", "Syntactic");
5013 let callees = db.trace_callees("callRun", 10).unwrap();
5014 assert!(
5015 callees.iter().any(|edge| {
5016 edge.to_symbol.as_deref().is_some_and(|name| name.ends_with("run"))
5017 && edge.confidence == "Syntactic"
5018 }),
5019 "callRun callees: {callees:?}"
5020 );
5021
5022 fs::remove_dir_all(root).unwrap();
5023 }
5024
5025 #[test]
5026 fn indexes_c_graph_edges_from_tree_sitter() {
5027 let root = unique_temp_root();
5028 let _ = fs::remove_dir_all(&root);
5029 fs::create_dir_all(root.join("src")).unwrap();
5030 fs::write(
5031 root.join("src/runtime.c"),
5032 r#"
5033typedef struct Runtime Runtime;
5034
5035struct Runtime {
5036 int state;
5037};
5038
5039int helper(Runtime *runtime) {
5040 return runtime->state;
5041}
5042
5043int runtime_open(Runtime *runtime) {
5044 return helper(runtime);
5045}
5046"#,
5047 )
5048 .unwrap();
5049 let config = source_config(root.clone(), Language::C);
5050 let db = IndexDatabase::rebuild(&config).unwrap();
5051
5052 assert_edge(&db, "runtime_open", "helper", "calls_name", "Syntactic");
5053
5054 fs::remove_dir_all(root).unwrap();
5055 }
5056
5057 #[test]
5058 fn indexes_c_file_scope_macro_regions_for_search() {
5059 let root = unique_temp_root();
5060 let _ = fs::remove_dir_all(&root);
5061 fs::create_dir_all(root.join("drivers/entropy")).unwrap();
5062 fs::write(
5063 root.join("drivers/entropy/entropy.c"),
5064 r#"
5065static int entropy_init(const struct device *dev)
5066{
5067 ARG_UNUSED(dev);
5068 return 0;
5069}
5070
5071/* Entropy driver APIs structure */
5072static DEVICE_API(entropy, entropy_cryptoacc_trng_api) = {
5073 .get_entropy = entropy_cryptoacc_trng_get_entropy,
5074};
5075
5076DEVICE_DT_INST_DEFINE(0, entropy_init, NULL, NULL, NULL,
5077 PRE_KERNEL_1, CONFIG_ENTROPY_INIT_PRIORITY,
5078 &entropy_cryptoacc_trng_api);
5079"#,
5080 )
5081 .unwrap();
5082 let config = Config {
5083 root: root.clone(),
5084 database: root.join(".rag-rat/index.sqlite"),
5085 targets: vec![ResolvedTarget {
5086 name: "c".to_string(),
5087 language: Language::C,
5088 directories: vec![PathBuf::from("drivers/entropy")],
5089 include: vec!["**/*.c".to_string()],
5090 exclude: Vec::new(),
5091 kind: TargetKind::Source,
5092 }],
5093 local_ai: Default::default(),
5094 };
5095 let db = IndexDatabase::rebuild(&config).unwrap();
5096
5097 let hits = db.search("DEVICE_API", 5, false).unwrap();
5098 assert!(
5099 hits.iter().any(|hit| {
5100 hit.path == "drivers/entropy/entropy.c" && hit.summary.contains("DEVICE_API")
5101 }),
5102 "DEVICE_API hits: {hits:?}"
5103 );
5104
5105 fs::remove_dir_all(root).unwrap();
5106 }
5107
5108 #[test]
5109 fn indexes_cpp_graph_edges_from_tree_sitter() {
5110 let root = unique_temp_root();
5111 let _ = fs::remove_dir_all(&root);
5112 fs::create_dir_all(root.join("src")).unwrap();
5113 fs::write(
5114 root.join("src/runtime.cpp"),
5115 r#"
5116namespace held {
5117class Runtime {
5118public:
5119 void open();
5120};
5121
5122void helper() {}
5123
5124void Runtime::open() {
5125 helper();
5126}
5127}
5128"#,
5129 )
5130 .unwrap();
5131 let config = source_config(root.clone(), Language::Cpp);
5132 let db = IndexDatabase::rebuild(&config).unwrap();
5133
5134 assert_edge(&db, "open", "helper", "calls_name", "Syntactic");
5135
5136 fs::remove_dir_all(root).unwrap();
5137 }
5138
5139 #[test]
5140 fn indexes_real_world_typescript_graph_patterns() {
5141 let root = fixture_temp_root("graph-realworld/typescript");
5142 let config = source_config(root.clone(), Language::TypeScript);
5143 let db = IndexDatabase::rebuild(&config).unwrap();
5144
5145 assert_edge(&db, "src/lib.tsx", "DefaultWidget", "imports", "Syntactic");
5146 assert_edge(&db, "src/lib.tsx", "WidgetNS", "imports", "NameOnly");
5147 assert_edge(&db, "src/lib.tsx", "WidgetProps", "imports", "Syntactic");
5148 assert_edge(&db, "src/lib.tsx", "ReExportedWidget", "exports", "NameOnly");
5149 assert_edge(&db, "useWidget", "useMemo", "calls_name", "NameOnly");
5150 assert_edge(&db, "useWidget", "DefaultWidget", "calls_name", "Syntactic");
5151 assert_edge(&db, "Shell", "renderWidget", "calls_name", "NameOnly");
5152 assert_edge(&db, "Shell", "WidgetNS", "references_type", "NameOnly");
5153 assert_edge(&db, "Shell", "DefaultWidget", "references_type", "Syntactic");
5154 assert_edge(&db, "DefaultWidget", "WidgetProps", "references_type", "Syntactic");
5155 let callees = db
5156 .trace_callees_with_options(
5157 "Shell",
5158 10,
5159 &crate::query::graph::GraphTraversalOptions {
5160 include_references: true,
5161 edge_kinds: None,
5162 ..Default::default()
5163 },
5164 )
5165 .unwrap();
5166 assert!(
5167 callees.iter().any(|edge| {
5168 edge.edge_kind == "references_type"
5169 && edge.edge_confidence == edge.confidence
5170 && edge.to_symbol.as_deref().is_some_and(|name| name.ends_with("DefaultWidget"))
5171 }),
5172 "Shell callees: {callees:?}"
5173 );
5174
5175 fs::remove_dir_all(root).unwrap();
5176 }
5177
5178 #[test]
5179 fn rust_macro_edges_do_not_resolve_to_same_named_modules() {
5180 let root = unique_temp_root();
5181 let _ = fs::remove_dir_all(&root);
5182 fs::create_dir_all(root.join("src")).unwrap();
5183 fs::write(
5184 root.join("src/lib.rs"),
5185 r#"
5186mod format;
5187
5188fn execute_one() {
5189 let _value = format!("hello");
5190}
5191"#,
5192 )
5193 .unwrap();
5194 fs::write(root.join("src/format.rs"), "pub fn helper() {}\n").unwrap();
5195 let config = source_config(root.clone(), Language::Rust);
5196 let db = IndexDatabase::rebuild(&config).unwrap();
5197
5198 let edge = db
5199 .storage
5200 .connection()
5201 .query_row(
5202 "
5203 SELECT edge_kind, to_name, to_symbol_id, confidence, resolution, evidence
5204 FROM edges
5205 WHERE edge_kind = 'uses_macro'
5206 AND to_name = 'format'
5207 ",
5208 [],
5209 |row| {
5210 Ok((
5211 row.get::<_, String>(0)?,
5212 row.get::<_, String>(1)?,
5213 row.get::<_, Option<i64>>(2)?,
5214 row.get::<_, String>(3)?,
5215 row.get::<_, String>(4)?,
5216 row.get::<_, Option<String>>(5)?,
5217 ))
5218 },
5219 )
5220 .unwrap();
5221 assert_eq!(edge.0, "uses_macro");
5222 assert_eq!(edge.1, "format");
5223 assert_eq!(edge.2, None);
5224 assert_eq!(edge.3, "NameOnly");
5225 assert_eq!(edge.4, "unresolved");
5226 assert!(edge.5.as_deref().is_some_and(|value| value.contains("format!")));
5227
5228 fs::remove_dir_all(root).unwrap();
5229 }
5230
5231 #[test]
5232 fn opening_old_graph_policy_rebuilds_stale_macro_edges() {
5233 let root = unique_temp_root();
5234 let _ = fs::remove_dir_all(&root);
5235 fs::create_dir_all(root.join("src")).unwrap();
5236 fs::write(
5237 root.join("src/lib.rs"),
5238 r#"
5239mod format;
5240
5241fn execute_one() {
5242 let _value = format!("hello");
5243}
5244"#,
5245 )
5246 .unwrap();
5247 fs::write(root.join("src/format.rs"), "pub fn helper() {}\n").unwrap();
5248 let config = source_config(root.clone(), Language::Rust);
5249 let db = IndexDatabase::rebuild(&config).unwrap();
5250 db.storage
5251 .connection()
5252 .execute("UPDATE index_meta SET value = 'old' WHERE key = 'graph_index_version'", [])
5253 .unwrap();
5254 db.storage
5255 .connection()
5256 .execute(
5257 "
5258 UPDATE edges
5259 SET edge_kind = 'calls_name',
5260 to_symbol_id = (SELECT id FROM symbols WHERE name = 'format' LIMIT 1),
5261 confidence = 'Syntactic',
5262 evidence = NULL,
5263 resolution = 'syntactic'
5264 WHERE to_name = 'format'
5265 ",
5266 [],
5267 )
5268 .unwrap();
5269 drop(db);
5270
5271 let reopened = IndexDatabase::open(&config.database).unwrap();
5272 let edge = reopened
5273 .storage
5274 .connection()
5275 .query_row(
5276 "
5277 SELECT edge_kind, to_symbol_id, confidence, resolution, evidence
5278 FROM edges
5279 WHERE to_name = 'format'
5280 AND edge_kind = 'uses_macro'
5281 ",
5282 [],
5283 |row| {
5284 Ok((
5285 row.get::<_, String>(0)?,
5286 row.get::<_, Option<i64>>(1)?,
5287 row.get::<_, String>(2)?,
5288 row.get::<_, String>(3)?,
5289 row.get::<_, Option<String>>(4)?,
5290 ))
5291 },
5292 )
5293 .unwrap();
5294 assert_eq!(edge.0, "uses_macro");
5295 assert_eq!(edge.1, None);
5296 assert_eq!(edge.2, "NameOnly");
5297 assert_eq!(edge.3, "unresolved");
5298 assert!(edge.4.as_deref().is_some_and(|value| value.contains("format!")));
5299
5300 fs::remove_dir_all(root).unwrap();
5301 }
5302
5303 #[test]
5304 fn qualified_common_member_calls_do_not_resolve_by_short_name() {
5305 let root = unique_temp_root();
5306 let _ = fs::remove_dir_all(&root);
5307 fs::create_dir_all(root.join("src")).unwrap();
5308 fs::write(
5309 root.join("src/lib.rs"),
5310 r#"
5311pub struct AlertsStore;
5312
5313impl AlertsStore {
5314 pub fn new() -> Self {
5315 Self
5316 }
5317}
5318
5319pub fn caller() {
5320 let _items: Vec<String> = Vec::new();
5321}
5322"#,
5323 )
5324 .unwrap();
5325 let config = source_config(root.clone(), Language::Rust);
5326 let db = IndexDatabase::rebuild(&config).unwrap();
5327
5328 let edge = db
5329 .storage
5330 .connection()
5331 .query_row(
5332 "
5333 SELECT to_name, target_qualified_name, to_symbol_id, confidence, resolution
5334 FROM edges
5335 WHERE from_name LIKE '%caller'
5336 AND edge_kind = 'calls_name'
5337 AND to_name = 'new'
5338 ",
5339 [],
5340 |row| {
5341 Ok((
5342 row.get::<_, String>(0)?,
5343 row.get::<_, Option<String>>(1)?,
5344 row.get::<_, Option<i64>>(2)?,
5345 row.get::<_, String>(3)?,
5346 row.get::<_, String>(4)?,
5347 ))
5348 },
5349 )
5350 .unwrap();
5351 assert_eq!(edge.0, "new");
5352 assert_eq!(edge.1.as_deref(), Some("Vec::new"));
5353 assert_eq!(edge.2, None);
5354 assert_eq!(edge.3, "NameOnly");
5355 assert_eq!(edge.4, "unresolved");
5356
5357 fs::remove_dir_all(root).unwrap();
5358 }
5359
5360 #[test]
5361 fn macro_edges_do_not_resolve_to_same_named_typescript_symbols() {
5362 let root = unique_temp_root();
5363 let _ = fs::remove_dir_all(&root);
5364 fs::create_dir_all(root.join("src")).unwrap();
5365 fs::write(
5366 root.join("src/lib.rs"),
5367 r#"
5368fn rust_entry() {
5369 let _payload = json!({"ok": true});
5370}
5371"#,
5372 )
5373 .unwrap();
5374 fs::write(root.join("src/preferences.ts"), "export function json() { return {}; }\n")
5375 .unwrap();
5376 let mut config = source_config(root.clone(), Language::Rust);
5377 config.targets.push(ResolvedTarget {
5378 name: "typescript".to_string(),
5379 language: Language::TypeScript,
5380 directories: vec![PathBuf::from("src")],
5381 include: vec!["**/*.ts".to_string()],
5382 exclude: Vec::new(),
5383 kind: TargetKind::Source,
5384 });
5385 let db = IndexDatabase::rebuild(&config).unwrap();
5386
5387 let edge = db
5388 .storage
5389 .connection()
5390 .query_row(
5391 "
5392 SELECT edge_kind, to_name, to_symbol_id, confidence, resolution, evidence
5393 FROM edges
5394 WHERE edge_kind = 'uses_macro'
5395 AND to_name = 'json'
5396 ",
5397 [],
5398 |row| {
5399 Ok((
5400 row.get::<_, String>(0)?,
5401 row.get::<_, String>(1)?,
5402 row.get::<_, Option<i64>>(2)?,
5403 row.get::<_, String>(3)?,
5404 row.get::<_, String>(4)?,
5405 row.get::<_, Option<String>>(5)?,
5406 ))
5407 },
5408 )
5409 .unwrap();
5410 assert_eq!(edge.0, "uses_macro");
5411 assert_eq!(edge.1, "json");
5412 assert_eq!(edge.2, None);
5413 assert_eq!(edge.3, "NameOnly");
5414 assert_eq!(edge.4, "unresolved");
5415 assert!(edge.5.as_deref().is_some_and(|value| value.contains("json!")));
5416
5417 fs::remove_dir_all(root).unwrap();
5418 }
5419
5420 #[test]
5421 fn qualified_crate_helper_callers_use_name_fallback() {
5422 let root = unique_temp_root();
5423 let _ = fs::remove_dir_all(&root);
5424 fs::create_dir_all(root.join("src")).unwrap();
5425 fs::write(
5426 root.join("src/lib.rs"),
5427 r#"
5428pub mod task_spawn {
5429 pub fn spawn_blocking() {}
5430}
5431
5432pub fn first() {
5433 crate::task_spawn::spawn_blocking();
5434}
5435
5436pub fn second() {
5437 task_spawn::spawn_blocking();
5438}
5439"#,
5440 )
5441 .unwrap();
5442 let config = source_config(root.clone(), Language::Rust);
5443 let db = IndexDatabase::rebuild(&config).unwrap();
5444
5445 let callers = db.find_callers("spawn_blocking", 10).unwrap();
5446 assert!(
5447 callers.iter().any(|edge| {
5448 edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("first"))
5449 && edge.edge_kind == "calls_name"
5450 && edge.resolution == "target_name_fallback"
5451 }),
5452 "spawn_blocking callers: {callers:?}"
5453 );
5454 assert!(
5455 callers.iter().any(|edge| {
5456 edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("second"))
5457 && edge.edge_kind == "calls_name"
5458 }),
5459 "spawn_blocking callers: {callers:?}"
5460 );
5461
5462 fs::remove_dir_all(root).unwrap();
5463 }
5464
5465 #[test]
5466 fn caller_lookup_does_not_match_related_names_or_chain_evidence() {
5467 let root = unique_temp_root();
5468 let _ = fs::remove_dir_all(&root);
5469 fs::create_dir_all(root.join("src")).unwrap();
5470 fs::write(
5471 root.join("src/lib.rs"),
5472 r#"
5473pub mod runtime {
5474 pub mod task_spawn {
5475 pub fn spawn() {}
5476 pub fn spawn_blocking() -> JoinHandle {
5477 JoinHandle
5478 }
5479 pub fn spawn_blocking_handle() {}
5480 pub fn spawn_blocking_offload() -> JoinHandle {
5481 JoinHandle
5482 }
5483 }
5484}
5485
5486pub struct JoinHandle;
5487
5488impl JoinHandle {
5489 pub fn map_err(self) {}
5490}
5491
5492pub fn direct() {
5493 crate::runtime::task_spawn::spawn_blocking();
5494}
5495
5496pub fn related_handle() {
5497 crate::runtime::task_spawn::spawn_blocking_handle();
5498}
5499
5500pub fn related_offload_chain() {
5501 crate::runtime::task_spawn::spawn_blocking_offload().map_err();
5502}
5503
5504pub fn related_spawn_with_text() {
5505 crate::runtime::task_spawn::spawn();
5506}
5507"#,
5508 )
5509 .unwrap();
5510 let config = source_config(root.clone(), Language::Rust);
5511 let db = IndexDatabase::rebuild(&config).unwrap();
5512
5513 let callers = db.find_callers("spawn_blocking", 20).unwrap();
5514 assert!(
5515 callers.iter().any(|edge| {
5516 edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("direct"))
5517 && edge.target.as_deref() == Some("spawn_blocking")
5518 && edge.edge_kind == "calls_name"
5519 }),
5520 "spawn_blocking callers: {callers:?}"
5521 );
5522 assert!(
5523 callers.iter().all(|edge| {
5524 !edge.from_symbol.as_deref().is_some_and(|name| {
5525 name.ends_with("related_handle")
5526 || name.ends_with("related_offload_chain")
5527 || name.ends_with("related_spawn_with_text")
5528 }) && !matches!(
5529 edge.target.as_deref(),
5530 Some("spawn_blocking_handle" | "spawn_blocking_offload" | "spawn" | "map_err")
5531 )
5532 }),
5533 "caller lookup leaked related names or chain evidence: {callers:?}"
5534 );
5535
5536 let qualified_callers = db.find_callers("src/lib.rs::spawn_blocking", 20).unwrap();
5537 assert!(
5538 qualified_callers.iter().any(|edge| {
5539 edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("direct"))
5540 && edge.target.as_deref() == Some("spawn_blocking")
5541 && edge.edge_kind == "calls_name"
5542 }),
5543 "qualified spawn_blocking callers: {qualified_callers:?}"
5544 );
5545 assert!(
5546 qualified_callers.iter().all(|edge| {
5547 !edge.from_symbol.as_deref().is_some_and(|name| {
5548 name.ends_with("related_handle")
5549 || name.ends_with("related_offload_chain")
5550 || name.ends_with("related_spawn_with_text")
5551 }) && !matches!(
5552 edge.target.as_deref(),
5553 Some("spawn_blocking_handle" | "spawn_blocking_offload" | "spawn" | "map_err")
5554 )
5555 }),
5556 "qualified caller lookup leaked related names or chain evidence: {qualified_callers:?}"
5557 );
5558
5559 fs::remove_dir_all(root).unwrap();
5560 }
5561
5562 #[test]
5563 fn files_past_the_old_structural_cap_still_contribute_symbols_and_edges() {
5564 let root = unique_temp_root();
5565 let _ = fs::remove_dir_all(&root);
5566 fs::create_dir_all(root.join("src")).unwrap();
5567 let filler =
5568 (0..700).map(|idx| format!("pub fn filler_{idx}() {{}}\n")).collect::<String>();
5569 fs::write(
5570 root.join("src/lib.rs"),
5571 format!(
5572 r#"
5573pub mod task_spawn {{
5574 pub fn spawn_blocking() {{}}
5575}}
5576
5577{filler}
5578
5579pub fn caller() {{
5580 crate::task_spawn::spawn_blocking();
5581}}
5582"#
5583 ),
5584 )
5585 .unwrap();
5586 let config = source_config(root.clone(), Language::Rust);
5587 assert!(fs::metadata(root.join("src/lib.rs")).unwrap().len() > 10_000);
5588 let db = IndexDatabase::rebuild(&config).unwrap();
5589
5590 let symbols = db.symbols("caller", Some(Language::Rust), 10).unwrap();
5591 assert!(
5592 symbols.iter().any(|symbol| symbol.name == "caller"),
5593 "caller symbols: {symbols:?}"
5594 );
5595 let callers = db.find_callers("spawn_blocking", 10).unwrap();
5596 assert!(
5597 callers.iter().any(|edge| {
5598 edge.edge_kind == "calls_name"
5599 && edge.target.as_deref() == Some("spawn_blocking")
5600 && edge.callsite.as_ref().is_some_and(|callsite| callsite.line > 700)
5601 }),
5602 "spawn_blocking callers: {callers:?}"
5603 );
5604 let impact =
5605 db.impact_surface("callers of crate::task_spawn::spawn_blocking in src", 10).unwrap();
5606 assert!(
5607 impact.iter().any(|item| {
5608 item.category == "Direct structural impact" && item.reason == "direct_caller"
5609 }),
5610 "impact: {impact:?}"
5611 );
5612
5613 fs::remove_dir_all(root).unwrap();
5614 }
5615
5616 #[test]
5617 fn impact_surface_uses_high_signal_query_symbols_and_call_edges() {
5618 let root = unique_temp_root();
5619 let _ = fs::remove_dir_all(&root);
5620 fs::create_dir_all(root.join("src")).unwrap();
5621 fs::write(
5622 root.join("src/lib.rs"),
5623 r#"
5624pub mod runtime {
5625 pub fn unrelated_runtime_symbol() {}
5626}
5627
5628pub mod task_spawn {
5629 pub fn spawn_blocking<F, T>(f: F) -> T
5630 where
5631 F: FnOnce() -> T + Send + 'static,
5632 T: Send + 'static,
5633 {
5634 f()
5635 }
5636}
5637
5638pub fn caller() {
5639 crate::task_spawn::spawn_blocking(|| 1);
5640}
5641"#,
5642 )
5643 .unwrap();
5644 let config = source_config(root.clone(), Language::Rust);
5645 let db = IndexDatabase::rebuild(&config).unwrap();
5646 let impact = db
5647 .impact_surface(
5648 "change runtime task_spawn spawn_blocking wasm inline native blocking pool",
5649 20,
5650 )
5651 .unwrap();
5652 assert!(
5653 impact.iter().any(|item| {
5654 item.category == "Direct structural impact"
5655 && item.reason == "direct_caller"
5656 && item.symbol.as_deref().is_some_and(|symbol| symbol.ends_with("caller"))
5657 }),
5658 "spawn_blocking caller should be present: {impact:?}"
5659 );
5660 assert!(
5661 impact.iter().all(|item| {
5662 !(item.reason == "exact_symbol_definition"
5663 && item.symbol.as_deref().is_some_and(|symbol| symbol.ends_with("runtime")))
5664 }),
5665 "broad `runtime` token should not become an exact impact seed: {impact:?}"
5666 );
5667 assert!(
5668 impact.iter().all(|item| {
5669 !item.evidence.iter().any(|evidence| evidence.contains("references_type"))
5670 && item.symbol.as_deref() != Some("Send")
5671 }),
5672 "type references should not appear as direct impact: {impact:?}"
5673 );
5674
5675 fs::remove_dir_all(root).unwrap();
5676 }
5677
5678 #[test]
5679 fn docs_for_symbol_prefers_local_source_context_before_broad_markdown() {
5680 let root = unique_temp_root();
5681 let _ = fs::remove_dir_all(&root);
5682 fs::create_dir_all(root.join("src/runtime")).unwrap();
5683 fs::create_dir_all(root.join("docs")).unwrap();
5684 fs::write(
5685 root.join("src/runtime/task_spawn.rs"),
5686 r#"
5687pub fn spawn_blocking<F, T>(f: F) -> T
5688where
5689 F: FnOnce() -> T + Send + 'static,
5690 T: Send + 'static,
5691{
5692 f()
5693}
5694"#,
5695 )
5696 .unwrap();
5697 fs::write(
5698 root.join("docs/phrase-persistence.md"),
5699 "# Phrase persistence\nUnrelated notes mention spawn_blocking in passing.\n",
5700 )
5701 .unwrap();
5702 fs::write(
5703 root.join("docs/task_spawn.md"),
5704 "# task_spawn\nLocal task_spawn notes explain spawn_blocking.\n",
5705 )
5706 .unwrap();
5707 let config = Config {
5708 root: root.clone(),
5709 database: root.join(".rag-rat/index.sqlite"),
5710 targets: vec![
5711 ResolvedTarget {
5712 name: "rust".to_string(),
5713 language: Language::Rust,
5714 directories: vec![PathBuf::from("src")],
5715 include: vec!["src/".to_string()],
5716 exclude: Vec::new(),
5717 kind: TargetKind::Source,
5718 },
5719 ResolvedTarget {
5720 name: "markdown".to_string(),
5721 language: Language::Markdown,
5722 directories: vec![PathBuf::from("docs")],
5723 include: vec!["**/*.md".to_string()],
5724 exclude: Vec::new(),
5725 kind: TargetKind::Docs,
5726 },
5727 ],
5728 local_ai: Default::default(),
5729 };
5730 let db = IndexDatabase::rebuild(&config).unwrap();
5731 let symbol = db.symbols("spawn_blocking", Some(Language::Rust), 10).unwrap().remove(0);
5732 let hits = db.docs_for_selected_symbol(&symbol, 10).unwrap();
5733 assert_eq!(hits[0].path, "src/runtime/task_spawn.rs", "docs hits: {hits:?}");
5734 let phrase_index = hits.iter().position(|hit| hit.path == "docs/phrase-persistence.md");
5735 let task_spawn_index = hits.iter().position(|hit| hit.path == "docs/task_spawn.md");
5736 assert!(
5737 phrase_index.is_none_or(|phrase| task_spawn_index.is_some_and(|local| local < phrase)),
5738 "path-local task_spawn docs should outrank unrelated phrase docs: {hits:?}"
5739 );
5740
5741 fs::remove_dir_all(root).unwrap();
5742 }
5743
5744 #[test]
5745 fn partial_tree_sitter_trees_still_contribute_valid_symbols_and_edges() {
5746 let root = unique_temp_root();
5747 let _ = fs::remove_dir_all(&root);
5748 fs::create_dir_all(root.join("src")).unwrap();
5749 fs::write(
5750 root.join("src/lib.rs"),
5751 r#"
5752pub fn helper() {}
5753
5754pub fn caller() {
5755 helper();
5756}
5757
5758fn broken( {
5759"#,
5760 )
5761 .unwrap();
5762 let config = source_config(root.clone(), Language::Rust);
5763 let db = IndexDatabase::rebuild(&config).unwrap();
5764
5765 let symbols = db.symbols("caller", Some(Language::Rust), 10).unwrap();
5766 assert!(
5767 symbols.iter().any(|symbol| symbol.name == "caller"),
5768 "caller symbols: {symbols:?}"
5769 );
5770 assert_edge(&db, "caller", "helper", "calls_name", "Syntactic");
5771
5772 fs::remove_dir_all(root).unwrap();
5773 }
5774
5775 #[test]
5776 fn receiver_method_calls_do_not_bind_to_same_named_free_functions() {
5777 let root = unique_temp_root();
5778 let _ = fs::remove_dir_all(&root);
5779 fs::create_dir_all(root.join("src")).unwrap();
5780 fs::write(
5781 root.join("src/lib.rs"),
5782 r#"
5783pub fn spawn_blocking() {}
5784
5785pub fn caller(joinset: JoinSet) {
5786 joinset.spawn_blocking();
5787}
5788
5789pub struct JoinSet;
5790"#,
5791 )
5792 .unwrap();
5793 let config = source_config(root.clone(), Language::Rust);
5794 let db = IndexDatabase::rebuild(&config).unwrap();
5795
5796 let edge = db
5797 .storage
5798 .connection()
5799 .query_row(
5800 "
5801 SELECT to_name, target_qualified_name, to_symbol_id, confidence, resolution, receiver_hint
5802 FROM edges
5803 WHERE from_name LIKE '%caller'
5804 AND edge_kind = 'calls_name'
5805 AND to_name = 'spawn_blocking'
5806 ",
5807 [],
5808 |row| {
5809 Ok((
5810 row.get::<_, String>(0)?,
5811 row.get::<_, Option<String>>(1)?,
5812 row.get::<_, Option<i64>>(2)?,
5813 row.get::<_, String>(3)?,
5814 row.get::<_, String>(4)?,
5815 row.get::<_, Option<String>>(5)?,
5816 ))
5817 },
5818 )
5819 .unwrap();
5820 assert_eq!(edge.0, "spawn_blocking");
5821 assert_eq!(edge.1.as_deref(), Some("joinset::spawn_blocking"));
5822 assert_eq!(edge.2, None);
5823 assert_eq!(edge.3, "NameOnly");
5824 assert_eq!(edge.4, "unresolved");
5825 assert_eq!(edge.5.as_deref(), Some("joinset"));
5826
5827 fs::remove_dir_all(root).unwrap();
5828 }
5829
5830 #[test]
5831 fn trace_callees_excludes_type_references_by_default() {
5832 let root = unique_temp_root();
5833 let _ = fs::remove_dir_all(&root);
5834 fs::create_dir_all(root.join("src")).unwrap();
5835 fs::write(
5836 root.join("src/lib.rs"),
5837 r#"
5838pub struct JoinError;
5839pub enum Result<T, E> { Ok(T), Err(E) }
5840pub fn helper() {}
5841
5842pub fn spawn_blocking<F, T>(f: F) -> Result<T, JoinError>
5843where
5844 F: FnOnce() -> T + Send + 'static,
5845 T: Send + 'static,
5846{
5847 helper();
5848 tokio::task::spawn_blocking(f)
5849}
5850"#,
5851 )
5852 .unwrap();
5853 let config = source_config(root.clone(), Language::Rust);
5854 let db = IndexDatabase::rebuild(&config).unwrap();
5855
5856 let default_callees = db.trace_callees("spawn_blocking", 20).unwrap();
5857 assert!(
5858 default_callees.iter().any(|edge| {
5859 edge.edge_kind == "calls_name"
5860 && edge.target.as_deref() == Some("helper")
5861 && edge.verified_target_symbol
5862 }),
5863 "default callees: {default_callees:?}"
5864 );
5865 assert!(
5866 default_callees
5867 .iter()
5868 .all(|edge| edge.target_qualified_name.as_deref()
5869 != Some("tokio::task::spawn_blocking")),
5870 "default callees leaked unresolved external call: {default_callees:?}"
5871 );
5872 assert!(
5873 default_callees.iter().all(|edge| edge.edge_kind != "references_type"),
5874 "default callees leaked type refs: {default_callees:?}"
5875 );
5876 assert!(
5877 default_callees.iter().all(|edge| !matches!(
5878 edge.target.as_deref(),
5879 Some("F" | "T" | "Send" | "Result" | "JoinError")
5880 )),
5881 "default callees leaked generic/type targets: {default_callees:?}"
5882 );
5883
5884 let with_refs = db
5885 .trace_callees_with_options(
5886 "spawn_blocking",
5887 20,
5888 &crate::query::graph::GraphTraversalOptions {
5889 include_references: true,
5890 edge_kinds: None,
5891 ..Default::default()
5892 },
5893 )
5894 .unwrap();
5895 assert!(
5896 with_refs.iter().any(|edge| edge.edge_kind == "references_type"),
5897 "reference-enabled callees: {with_refs:?}"
5898 );
5899
5900 let with_unresolved = db
5901 .trace_callees_with_options(
5902 "spawn_blocking",
5903 20,
5904 &crate::query::graph::GraphTraversalOptions {
5905 include_unresolved: true,
5906 ..Default::default()
5907 },
5908 )
5909 .unwrap();
5910 assert!(
5911 with_unresolved
5912 .iter()
5913 .any(|edge| edge.target_qualified_name.as_deref()
5914 == Some("tokio::task::spawn_blocking")),
5915 "unresolved-enabled callees: {with_unresolved:?}"
5916 );
5917
5918 fs::remove_dir_all(root).unwrap();
5919 }
5920
5921 #[test]
5922 fn trace_callees_defaults_to_repo_relevant_calls() {
5923 let root = unique_temp_root();
5924 let _ = fs::remove_dir_all(&root);
5925 fs::create_dir_all(root.join("src")).unwrap();
5926 fs::write(
5927 root.join("src/lib.rs"),
5928 r#"
5929pub fn repo_helper() {}
5930
5931pub fn caller(input: Result<String, String>) -> String {
5932 repo_helper();
5933 let values: Vec<String> = Vec::new();
5934 let _ = input.map_err(|error| error.to_string());
5935 let _ = Some("value").unwrap_or_else(|| "fallback");
5936 let _ = format!("hello");
5937 values.get(0).unwrap_or_else(|| "fallback").to_string()
5938}
5939"#,
5940 )
5941 .unwrap();
5942 let config = source_config(root.clone(), Language::Rust);
5943 let db = IndexDatabase::rebuild(&config).unwrap();
5944
5945 let default_callees = db.trace_callees("caller", 20).unwrap();
5946 assert!(
5947 default_callees.iter().any(|edge| edge.target.as_deref() == Some("repo_helper")),
5948 "default callees should keep repo-local calls: {default_callees:?}"
5949 );
5950 assert!(
5951 default_callees.iter().all(|edge| {
5952 edge.edge_kind != "uses_macro"
5953 && !matches!(
5954 edge.target.as_deref(),
5955 Some("new" | "map_err" | "unwrap_or_else" | "to_string" | "format")
5956 )
5957 }),
5958 "default callees leaked low-signal calls: {default_callees:?}"
5959 );
5960
5961 let expanded = db
5962 .trace_callees_with_options(
5963 "caller",
5964 20,
5965 &crate::query::graph::GraphTraversalOptions {
5966 include_unresolved: true,
5967 include_macros: true,
5968 include_common_methods: true,
5969 ..Default::default()
5970 },
5971 )
5972 .unwrap();
5973 assert!(
5974 expanded.iter().any(|edge| edge.edge_kind == "uses_macro"),
5975 "macro-enabled callees: {expanded:?}"
5976 );
5977 assert!(
5978 expanded.iter().any(|edge| edge.target.as_deref() == Some("unwrap_or_else")),
5979 "common-method-enabled callees: {expanded:?}"
5980 );
5981
5982 fs::remove_dir_all(root).unwrap();
5983 }
5984
5985 #[test]
5986 fn indexes_kotlin_graph_edges_from_tree_sitter() {
5987 let root = unique_temp_root();
5988 let _ = fs::remove_dir_all(&root);
5989 fs::create_dir_all(root.join("src")).unwrap();
5990 fs::write(
5991 root.join("src/Main.kt"),
5992 r#"
5993package dev.cq27.test
5994
5995import dev.cq27.lib.ExternalThing
5996
5997interface Syncable
5998
5999class MainBridge : Syncable {
6000 suspend fun syncOnce() {
6001 helper()
6002 ExternalThing()
6003 }
6004}
6005
6006fun helper() {}
6007"#,
6008 )
6009 .unwrap();
6010 let config = source_config(root.clone(), Language::Kotlin);
6011 let db = IndexDatabase::rebuild(&config).unwrap();
6012
6013 assert_edge(&db, "syncOnce", "helper", "calls_name", "Syntactic");
6014 assert_edge(&db, "MainBridge", "Syncable", "implements", "Syntactic");
6015 assert_edge(&db, "src/Main.kt", "ExternalThing", "imports", "NameOnly");
6016 let impact = db.impact_surface("helper", 10).unwrap();
6017 assert!(
6018 impact.iter().any(|item| {
6019 item.category == "Direct structural impact" && item.reason == "direct_caller"
6020 }),
6021 "impact: {impact:?}"
6022 );
6023
6024 fs::remove_dir_all(root).unwrap();
6025 }
6026
6027 #[test]
6028 fn indexes_real_world_kotlin_graph_patterns() {
6029 let root = fixture_temp_root("graph-realworld/kotlin");
6030 let config = source_config(root.clone(), Language::Kotlin);
6031 let db = IndexDatabase::rebuild(&config).unwrap();
6032
6033 assert_edge(&db, "src/Main.kt", "ExternalFactory", "imports", "NameOnly");
6034 assert_edge(&db, "Worker", "companion", "contains", "Exact");
6035 assert_edge(&db, "companion", "create", "contains", "Exact");
6036 assert_edge(&db, "syncOnce", "create", "calls_name", "Syntactic");
6037 assert_edge(&db, "syncOnce", "Worker", "references_type", "Syntactic");
6038 assert_edge(&db, "syncOnce", "run", "calls_name", "Syntactic");
6039 assert_edge(&db, "syncOnce", "SingletonRunner", "references_type", "Syntactic");
6040 assert_edge(&db, "syncOnce", "ExternalFactory", "calls_name", "NameOnly");
6041 assert_edge(&db, "syncOnce", "ExternalFactory", "references_type", "NameOnly");
6042 assert_edge(&db, "syncOnce", "cleaned", "calls_name", "Syntactic");
6043 let callers = db.find_callers("cleaned", 10).unwrap();
6044 assert!(
6045 callers.iter().any(|edge| {
6046 edge.edge_kind == "calls_name"
6047 && edge.edge_confidence == edge.confidence
6048 && edge.from_symbol.as_deref().is_some_and(|name| name.ends_with("syncOnce"))
6049 }),
6050 "cleaned callers: {callers:?}"
6051 );
6052
6053 fs::remove_dir_all(root).unwrap();
6054 }
6055
6056 #[test]
6057 fn kotlin_caller_lookup_respects_qualified_receivers_for_common_method_names() {
6058 let root = unique_temp_root();
6059 let _ = fs::remove_dir_all(&root);
6060 fs::create_dir_all(root.join("src")).unwrap();
6061 fs::write(
6062 root.join("src/Main.kt"),
6063 r#"
6064package dev.cq27.test
6065
6066object WatchProposalBuilder {
6067 fun build(): String = "proposal"
6068}
6069
6070class AndroidDialogBuilder {
6071 fun build(): String = "dialog"
6072}
6073
6074fun actualCaller() {
6075 WatchProposalBuilder.build()
6076}
6077
6078fun unrelatedBuilderCalls(dialog: AndroidDialogBuilder) {
6079 dialog.build()
6080 AndroidDialogBuilder().build()
6081}
6082"#,
6083 )
6084 .unwrap();
6085 let config = source_config(root.clone(), Language::Kotlin);
6086 let db = IndexDatabase::rebuild(&config).unwrap();
6087 let target = db
6088 .symbols("build", Some(Language::Kotlin), 10)
6089 .unwrap()
6090 .into_iter()
6091 .find(|symbol| symbol.qualified_name.contains("WatchProposalBuilder"))
6092 .expect("WatchProposalBuilder.build symbol");
6093 let callers = db
6094 .find_callers_with_options(
6095 "build",
6096 20,
6097 &crate::query::graph::GraphTraversalOptions {
6098 resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
6099 symbol_id: Some(target.symbol_id),
6100 ..Default::default()
6101 },
6102 )
6103 .unwrap();
6104 assert_eq!(
6105 callers
6106 .iter()
6107 .filter(|edge| edge
6108 .from_symbol
6109 .as_deref()
6110 .is_some_and(|name| name.ends_with("actualCaller")))
6111 .count(),
6112 1,
6113 "actual caller should be present once: {callers:?}"
6114 );
6115 assert!(
6116 callers.iter().all(|edge| edge
6117 .from_symbol
6118 .as_deref()
6119 .is_none_or(|name| !name.ends_with("unrelatedBuilderCalls"))),
6120 "unrelated builder calls should not resolve to WatchProposalBuilder.build: {callers:?}"
6121 );
6122
6123 fs::remove_dir_all(root).unwrap();
6124 }
6125
6126 #[test]
6127 fn github_sync_caches_papertrail_and_rationale_without_query_time_crawling() {
6128 let (root, config) =
6129 markdown_config("# Decision\nRefs cq27-dev/rag-rat#42\nwe will keep sqlite\n");
6130 let db = IndexDatabase::rebuild(&config).unwrap();
6131 let mock = MockGitHubClient;
6132
6133 let offline =
6134 github::sync_from_refs::<MockGitHubClient>(db.storage.connection(), &root, None, true)
6135 .unwrap();
6136 assert!(offline.offline);
6137 assert_eq!(offline.discovered_refs, 1);
6138 assert_eq!(offline.synced_items, 0);
6139
6140 let report =
6141 github::sync_from_refs(db.storage.connection(), &root, Some(&mock), false).unwrap();
6142 assert!(!report.offline);
6143 assert_eq!(report.discovered_refs, 1);
6144 assert_eq!(report.synced_items, 5);
6145 assert_eq!(report.status.issues, 1);
6146 assert_eq!(report.status.comments, 1);
6147 assert_eq!(report.status.pulls, 1);
6148 assert_eq!(report.status.reviews, 1);
6149 assert_eq!(report.status.review_comments, 1);
6150
6151 let issue_hits = db.github_issue_search("sqlite", 10).unwrap();
6152 assert_eq!(issue_hits.len(), 1);
6153 assert_eq!(issue_hits[0].classification, "decision");
6154 assert_eq!(issue_hits[0].evidence_kind, "historical_github");
6155
6156 let refs = db.github_refs_for_path("docs/search.md", 10).unwrap();
6157 assert_eq!(refs.len(), 1);
6158 assert_eq!(refs[0].source_kind, "file");
6159
6160 let rationale = db.rationale_search("risk", 10).unwrap();
6161 assert!(rationale.iter().any(|item| item.classification == "risk"));
6162 let issue_ref_rationale = db.rationale_search("Fixes #42", 10).unwrap();
6163 assert_eq!(issue_ref_rationale.first().map(|item| item.number), Some(42));
6164 assert_eq!(
6165 issue_ref_rationale.first().map(|item| item.evidence_kind),
6166 Some("literal_github_ref")
6167 );
6168 assert_eq!(issue_ref_rationale.first().map(|item| item.score), Some(1.0));
6169 assert!(
6170 issue_ref_rationale.iter().any(|item| item.number == 42),
6171 "issue ref rationale should use structured GitHub refs: {issue_ref_rationale:?}"
6172 );
6173
6174 let chunk_id = first_chunk_id(&db);
6175 let papertrail = db.papertrail_for_chunk(chunk_id, 10).unwrap().unwrap();
6176 assert!(papertrail.current_source.is_some());
6177 assert!(!papertrail.github_evidence.is_empty());
6178 assert!(papertrail.github_evidence.iter().all(|item| {
6179 matches!(item.evidence_kind, "historical_github" | "literal_github_ref")
6180 }));
6181
6182 fs::remove_dir_all(root).unwrap();
6183 }
6184
6185 #[test]
6186 fn papertrail_for_commit_prefers_commit_sourced_github_refs() {
6187 let root = unique_temp_root();
6188 let _ = fs::remove_dir_all(&root);
6189 fs::create_dir_all(root.join("docs")).unwrap();
6190 run_git(&root, &["init"]);
6191 run_git(&root, &["config", "user.name", "Rag Rat"]);
6192 run_git(&root, &["config", "user.email", "rag@example.com"]);
6193 fs::write(root.join("docs/search.md"), "# Decision\nalpha\n").unwrap();
6194 run_git(&root, &["add", "."]);
6195 run_git(&root, &["commit", "-m", "Fix search rationale", "-m", "Fixes #42"]);
6196
6197 let config = markdown_config_for_root(root.clone());
6198 let db = IndexDatabase::rebuild(&config).unwrap();
6199 let commit = db
6200 .storage
6201 .connection()
6202 .query_row("SELECT hash FROM git_commits LIMIT 1", [], |row| row.get::<_, String>(0))
6203 .unwrap();
6204 let mock = MockGitHubClient;
6205 github::sync_from_refs(db.storage.connection(), &root, Some(&mock), false).unwrap();
6206
6207 let papertrail = db.papertrail_for_commit(&commit[..7], 10).unwrap();
6208 assert_eq!(papertrail.github_evidence.first().map(|item| item.number), Some(42));
6209 assert_eq!(
6210 papertrail.github_evidence.first().map(|item| item.evidence_kind),
6211 Some("literal_github_ref")
6212 );
6213 assert!(
6214 papertrail.fallback_github_evidence.is_empty(),
6215 "structured commit refs should suppress noisy fallback evidence: {papertrail:?}"
6216 );
6217
6218 fs::remove_dir_all(root).unwrap();
6219 }
6220
6221 #[test]
6222 fn papertrail_for_symbol_dedupes_duplicate_file_refs() {
6223 let root = unique_temp_root();
6224 let _ = fs::remove_dir_all(&root);
6225 fs::create_dir_all(root.join("src")).unwrap();
6226 fs::write(
6227 root.join("src/lib.rs"),
6228 "// First rationale (#42)\n// Second rationale (#42)\npub fn tracked_symbol() {}\n",
6229 )
6230 .unwrap();
6231 let config = source_config(root.clone(), Language::Rust);
6232 let db = IndexDatabase::rebuild(&config).unwrap();
6233 let mock = MockGitHubClient;
6234 github::sync_from_refs(db.storage.connection(), &root, Some(&mock), false).unwrap();
6235 let papertrail = db
6236 .papertrail_for_symbol("tracked_symbol", Some(Language::Rust), 10)
6237 .unwrap()
6238 .expect("tracked symbol papertrail");
6239
6240 assert_eq!(
6241 papertrail
6242 .github_evidence
6243 .iter()
6244 .filter(|item| item.number == 42 && item.item_kind == "issue")
6245 .count(),
6246 1,
6247 "duplicate #42 refs in one file should collapse to one issue evidence row: {papertrail:?}"
6248 );
6249
6250 fs::remove_dir_all(root).unwrap();
6251 }
6252
6253 #[test]
6254 fn github_sync_keeps_partial_cache_and_skips_synced_refs_after_404() {
6255 let (root, config) = markdown_config(
6256 "# Decision\nRefs cq27-dev/rag-rat#42 and cq27-dev/rag-rat#404\nwe will keep sqlite\n",
6257 );
6258 let db = IndexDatabase::rebuild(&config).unwrap();
6259 let mock = PartiallyFailingGitHubClient;
6260
6261 let report =
6262 github::sync_from_refs(db.storage.connection(), &root, Some(&mock), false).unwrap();
6263 assert_eq!(report.discovered_refs, 2);
6264 assert_eq!(report.synced_items, 5);
6265 assert_eq!(report.failed_refs, 1);
6266 assert_eq!(report.errors.len(), 1);
6267 assert_eq!(report.errors[0].number, 404);
6268 assert_eq!(report.errors[0].status, "not_found");
6269
6270 let issue_hits = db.github_issue_search("sqlite", 10).unwrap();
6271 assert_eq!(issue_hits.len(), 1);
6272 assert_eq!(issue_hits[0].number, 42);
6273
6274 let second =
6275 github::sync_from_refs(db.storage.connection(), &root, Some(&mock), false).unwrap();
6276 assert_eq!(second.synced_items, 0);
6277 assert_eq!(second.skipped_refs, 2);
6278 assert_eq!(second.failed_refs, 0);
6279
6280 fs::remove_dir_all(root).unwrap();
6281 }
6282
6283 #[test]
6284 fn search_recovers_when_fts_is_marked_dirty() {
6285 let (root, config) = markdown_config("alpha token");
6286 let db = IndexDatabase::rebuild(&config).unwrap();
6287 db.mark_fts_dirty().unwrap();
6288
6289 let dirty = db.status(&config.database).unwrap();
6290 assert!(dirty.fts_dirty);
6291 assert!(!dirty.fts_fresh);
6292
6293 let hits = db.search("alpha", 10, false).unwrap();
6294 assert_eq!(hits.len(), 1);
6295 assert_eq!(hits[0].summary, "alpha token");
6296 let fresh = db.status(&config.database).unwrap();
6297 assert!(!fresh.fts_dirty);
6298 assert!(fresh.fts_fresh);
6299
6300 fs::remove_dir_all(root).unwrap();
6301 }
6302
6303 #[test]
6304 fn read_chunk_relocates_small_line_drift_to_current_text() {
6305 let (root, config) = markdown_config("# Title\nalpha token\n");
6306 let db = IndexDatabase::rebuild(&config).unwrap();
6307 let chunk_id = first_chunk_id(&db);
6308 fs::write(root.join("docs/search.md"), "inserted\n# Title\nalpha token\n").unwrap();
6309
6310 let chunk = db.read_chunk(chunk_id).unwrap().unwrap();
6311 assert_eq!(chunk.start_line, 2);
6312 assert_eq!(chunk.end_line, 3);
6313 assert_eq!(chunk.text, "# Title\nalpha token\n");
6314
6315 fs::remove_dir_all(root).unwrap();
6316 }
6317
6318 #[test]
6319 fn read_chunk_large_drift_reindexes_and_reports_stale_chunk() {
6320 let (root, config) = markdown_config("# Title\nalpha token\n");
6321 let db = IndexDatabase::rebuild(&config).unwrap();
6322 let chunk_id = first_chunk_id(&db);
6323 fs::write(root.join("docs/search.md"), "# Replacement\nbeta token\n").unwrap();
6324
6325 let err = db.read_chunk(chunk_id).unwrap_err().to_string();
6326 assert!(err.contains("StaleChunk"), "{err}");
6327 let hits = db.search("beta", 10, false).unwrap();
6328 assert_eq!(hits.len(), 1);
6329 assert!(db.search("alpha", 10, false).unwrap().is_empty());
6330
6331 fs::remove_dir_all(root).unwrap();
6332 }
6333
6334 #[test]
6335 fn search_retries_after_healing_stale_hit() {
6336 let (root, config) = markdown_config("# Title\nalpha token\n");
6337 let db = IndexDatabase::rebuild(&config).unwrap();
6338 fs::write(root.join("docs/search.md"), "# Title\nbeta token\n").unwrap();
6339
6340 let hits = db.search("alpha", 10, false).unwrap();
6341 assert!(hits.is_empty());
6342 let beta_hits = db.search("beta", 10, false).unwrap();
6343 assert_eq!(beta_hits.len(), 1);
6344 assert!(beta_hits[0].summary.contains("beta"));
6345
6346 fs::remove_dir_all(root).unwrap();
6347 }
6348
6349 #[test]
6350 fn search_heals_relocated_hits_before_returning_line_spans() {
6351 let (root, config) = markdown_config("# Title\nalpha token\n");
6352 let db = IndexDatabase::rebuild(&config).unwrap();
6353 fs::write(root.join("docs/search.md"), "inserted\n# Title\nalpha token\n").unwrap();
6354
6355 let hits = db.search("alpha", 10, false).unwrap();
6356 assert_eq!(hits.len(), 1);
6357 assert_eq!(hits[0].start_line, 2);
6358 assert_eq!(hits[0].end_line, 3);
6359 assert!(hits[0].summary.contains("alpha"));
6360
6361 fs::remove_dir_all(root).unwrap();
6362 }
6363
6364 #[test]
6365 fn read_chunk_deleted_source_reports_gone() {
6366 let (root, config) = markdown_config("# Title\nalpha token\n");
6367 let db = IndexDatabase::rebuild(&config).unwrap();
6368 let chunk_id = first_chunk_id(&db);
6369 fs::remove_file(root.join("docs/search.md")).unwrap();
6370
6371 let err = db.read_chunk(chunk_id).unwrap_err().to_string();
6372 assert!(err.contains("Gone"), "{err}");
6373 assert!(db.search("alpha", 10, false).unwrap().is_empty());
6374
6375 fs::remove_dir_all(root).unwrap();
6376 }
6377
6378 #[test]
6379 fn search_returns_needs_reindex_when_heal_cap_is_exceeded() {
6380 let root = unique_temp_root();
6381 let _ = fs::remove_dir_all(&root);
6382 let docs = root.join("docs");
6383 fs::create_dir_all(&docs).unwrap();
6384 for index in 0..=MAX_AUTO_HEAL_FILES_PER_CALL {
6385 fs::write(docs.join(format!("doc-{index}.md")), "common stale token\n").unwrap();
6386 }
6387 let config = markdown_config_for_root(root.clone());
6388 let db = IndexDatabase::rebuild(&config).unwrap();
6389 for index in 0..=MAX_AUTO_HEAL_FILES_PER_CALL {
6390 fs::write(docs.join(format!("doc-{index}.md")), "fresh replacement token\n").unwrap();
6391 }
6392
6393 let err = db.search("common", 20, false).unwrap_err().to_string();
6394 assert!(err.contains("needs_reindex"), "{err}");
6395
6396 fs::remove_dir_all(root).unwrap();
6397 }
6398
6399 #[test]
6400 fn heal_index_limit_does_not_warn_when_only_fresh_files_are_skipped() {
6401 let root = unique_temp_root();
6402 let _ = fs::remove_dir_all(&root);
6403 let docs = root.join("docs");
6404 fs::create_dir_all(&docs).unwrap();
6405 fs::write(docs.join("one.md"), "one fresh token\n").unwrap();
6406 fs::write(docs.join("two.md"), "two fresh token\n").unwrap();
6407 let config = markdown_config_for_root(root.clone());
6408 let db = IndexDatabase::rebuild(&config).unwrap();
6409
6410 let report = db.heal_index(Some(1)).unwrap();
6411
6412 assert_eq!(report.healed_files, 0);
6413 assert_eq!(report.removed_files, 0);
6414 assert_eq!(report.skipped_files, 2);
6415 assert_eq!(report.message, None);
6416
6417 fs::remove_dir_all(root).unwrap();
6418 }
6419
6420 #[test]
6421 fn search_recovers_when_fts_revision_is_stale() {
6422 let (root, config) = markdown_config("alpha token");
6423 let db = IndexDatabase::rebuild(&config).unwrap();
6424 db.set_meta("fts_source_revision", "stale").unwrap();
6425
6426 let stale = db.status(&config.database).unwrap();
6427 assert!(!stale.fts_dirty);
6428 assert!(!stale.fts_fresh);
6429
6430 let hits = db.search("alpha", 10, false).unwrap();
6431 assert_eq!(hits.len(), 1);
6432 let fresh = db.status(&config.database).unwrap();
6433 assert_eq!(fresh.fts_source_revision.as_deref(), Some(fresh.content_revision.as_str()));
6434 assert!(fresh.fts_fresh);
6435
6436 fs::remove_dir_all(root).unwrap();
6437 }
6438
6439 #[test]
6440 fn parser_failures_report_paths() {
6441 let root = unique_temp_root();
6442 let _ = fs::remove_dir_all(&root);
6443 let src = root.join("src");
6444 fs::create_dir_all(&src).unwrap();
6445 fs::write(src.join("broken.rs"), "pub fn broken(").unwrap();
6446 let config = Config {
6447 root: root.clone(),
6448 database: root.join(".rag-rat/index.sqlite"),
6449 targets: vec![ResolvedTarget {
6450 name: "rust".to_string(),
6451 language: Language::Rust,
6452 directories: vec![PathBuf::from("src")],
6453 include: vec!["**/*.rs".to_string()],
6454 exclude: Vec::new(),
6455 kind: TargetKind::Source,
6456 }],
6457 local_ai: Default::default(),
6458 };
6459
6460 let db = IndexDatabase::rebuild(&config).unwrap();
6461 let status = db.status(&config.database).unwrap();
6462 assert_eq!(status.parser_failures, 1);
6463 assert_eq!(status.parser_failure_paths[0].path, "src/broken.rs");
6464
6465 fs::remove_dir_all(root).unwrap();
6466 }
6467
6468 #[test]
6469 fn repo_memory_bound_to_logical_symbol_surfaces_in_symbol_chunk_and_impact() {
6470 let root = unique_temp_root();
6471 let _ = fs::remove_dir_all(&root);
6472 fs::create_dir_all(root.join("src")).unwrap();
6473 fs::write(
6474 root.join("src/lib.rs"),
6475 "#[cfg(unix)]\npub fn cfg_helper() {}\n#[cfg(windows)]\npub fn cfg_helper() {}\n",
6476 )
6477 .unwrap();
6478 let config = source_config(root.clone(), Language::Rust);
6479 let db = IndexDatabase::rebuild(&config).unwrap();
6480 let symbol = db
6481 .select_symbol(&crate::query::symbol::SymbolSelector {
6482 logical_symbol_id: None,
6483 symbol_id: None,
6484 symbol_path: None,
6485 symbol: Some("cfg_helper".to_string()),
6486 language: Some(Language::Rust),
6487 allow_ambiguous: true,
6488 limit: 10,
6489 })
6490 .unwrap()
6491 .unwrap()
6492 .expect("selected symbol");
6493 let logical_symbol_id = symbol.logical_symbol_id.expect("logical symbol id");
6494
6495 let created = db
6496 .memory_create(crate::query::memory::RepoMemoryCreate {
6497 kind: "Invariant".to_string(),
6498 title: "Treat cfg helper variants as one logical helper".to_string(),
6499 body: "Caller and impact analysis should use the logical symbol, not one cfg body variant."
6500 .to_string(),
6501 confidence: "high".to_string(),
6502 created_by: Some("test-agent".to_string()),
6503 source: Some("agent".to_string()),
6504 tags: vec!["cfg".to_string(), "graph".to_string()],
6505 bind: crate::query::memory::RepoMemoryBindTarget {
6506 logical_symbol_id: Some(logical_symbol_id),
6507 symbol_id: None,
6508 chunk_id: None,
6509 edge_id: None,
6510 path: None,
6511 start_line: None,
6512 end_line: None,
6513 commit_hash: None,
6514 github_owner: None,
6515 github_repo: None,
6516 github_number: None,
6517 start_logical_symbol_id: None,
6518 end_logical_symbol_id: None,
6519 edge_sequence_hash: None,
6520 path_summary: None,
6521 },
6522 })
6523 .unwrap();
6524 assert!(!created.duplicate);
6525 assert_eq!(created.memory.bindings[0].binding_kind, "logical_symbol");
6526
6527 let memories = db.memory_for_symbol(&symbol, 10).unwrap();
6528 assert_eq!(memories.len(), 1);
6529 assert_eq!(memories[0].kind, "Invariant");
6530 let chunk_id = memories[0].bindings[0].chunk_id.expect("bound chunk");
6531 let chunk = db.read_chunk(chunk_id).unwrap().expect("memory chunk");
6532 assert_eq!(chunk.memories.len(), 1);
6533 assert_eq!(chunk.memories[0].memory_id, created.memory.memory_id);
6534
6535 let impact = db
6536 .impact_surface_report_for_selected_symbol(
6537 &symbol,
6538 10,
6539 &crate::query::impact::ImpactSurfaceOptions::default(),
6540 )
6541 .unwrap();
6542 assert_eq!(impact.repo_memories.direct.len(), 1);
6543 assert_eq!(impact.completeness_and_caveats.memory_status.active, 1);
6544 assert_eq!(impact.completeness_and_caveats.memory_status.stale, 0);
6545
6546 fs::remove_dir_all(root).unwrap();
6547 }
6548
6549 #[test]
6550 fn repo_memory_validate_marks_changed_or_missing_anchors_non_current() {
6551 let root = unique_temp_root();
6552 let _ = fs::remove_dir_all(&root);
6553 fs::create_dir_all(root.join("src")).unwrap();
6554 fs::write(root.join("src/lib.rs"), "pub fn anchored_memory() {}\n").unwrap();
6555 let config = source_config(root.clone(), Language::Rust);
6556 let db = IndexDatabase::rebuild(&config).unwrap();
6557 let symbol = db
6558 .select_symbol(&crate::query::symbol::SymbolSelector {
6559 logical_symbol_id: None,
6560 symbol_id: None,
6561 symbol_path: None,
6562 symbol: Some("anchored_memory".to_string()),
6563 language: Some(Language::Rust),
6564 allow_ambiguous: false,
6565 limit: 10,
6566 })
6567 .unwrap()
6568 .unwrap()
6569 .expect("selected symbol");
6570 let chunk_id = db
6571 .storage
6572 .connection()
6573 .query_row(
6574 "
6575 SELECT chunks.id
6576 FROM chunks
6577 JOIN files ON files.id = chunks.file_id
6578 WHERE files.path = ?1 AND chunks.symbol_path = ?2
6579 LIMIT 1
6580 ",
6581 params![symbol.path, symbol.qualified_name],
6582 |row| row.get::<_, i64>(0),
6583 )
6584 .unwrap();
6585 let created = db
6586 .memory_create(crate::query::memory::RepoMemoryCreate {
6587 kind: "Risk".to_string(),
6588 title: "Anchor must become stale when source hash changes".to_string(),
6589 body: "Validation should separate stale memories from current repo evidence."
6590 .to_string(),
6591 confidence: "medium".to_string(),
6592 created_by: Some("test-agent".to_string()),
6593 source: Some("agent".to_string()),
6594 tags: Vec::new(),
6595 bind: crate::query::memory::RepoMemoryBindTarget {
6596 logical_symbol_id: None,
6597 symbol_id: None,
6598 chunk_id: Some(chunk_id),
6599 edge_id: None,
6600 path: None,
6601 start_line: None,
6602 end_line: None,
6603 commit_hash: None,
6604 github_owner: None,
6605 github_repo: None,
6606 github_number: None,
6607 start_logical_symbol_id: None,
6608 end_logical_symbol_id: None,
6609 edge_sequence_hash: None,
6610 path_summary: None,
6611 },
6612 })
6613 .unwrap();
6614
6615 db.storage
6616 .connection()
6617 .execute("UPDATE chunks SET text_hash = 'changed' WHERE id = ?1", [chunk_id])
6618 .unwrap();
6619 let report = db.memory_validate().unwrap();
6620 assert_eq!(report.stale, 1);
6621 let stale = db.memory_for_symbol(&symbol, 10).unwrap();
6622 assert_eq!(stale[0].memory_id, created.memory.memory_id);
6623 assert_eq!(stale[0].bindings[0].anchor_status, "stale");
6624
6625 db.storage.connection().execute("DELETE FROM chunks WHERE id = ?1", [chunk_id]).unwrap();
6626 let report = db.memory_validate().unwrap();
6627 assert_eq!(report.gone, 1);
6628 let gone = db.memory_for_symbol(&symbol, 10).unwrap();
6629 assert_eq!(gone[0].bindings[0].anchor_status, "gone");
6630
6631 fs::remove_dir_all(root).unwrap();
6632 }
6633
6634 #[test]
6635 fn repo_memory_bound_to_edge_surfaces_when_impact_crosses_call_path() {
6636 let root = unique_temp_root();
6637 let _ = fs::remove_dir_all(&root);
6638 fs::create_dir_all(root.join("src")).unwrap();
6639 fs::write(
6640 root.join("src/lib.rs"),
6641 "pub fn target_edge() {}\npub fn caller_edge() {\n target_edge();\n}\n",
6642 )
6643 .unwrap();
6644 let config = source_config(root.clone(), Language::Rust);
6645 let db = IndexDatabase::rebuild(&config).unwrap();
6646 let target = db
6647 .select_symbol(&crate::query::symbol::SymbolSelector {
6648 logical_symbol_id: None,
6649 symbol_id: None,
6650 symbol_path: None,
6651 symbol: Some("target_edge".to_string()),
6652 language: Some(Language::Rust),
6653 allow_ambiguous: false,
6654 limit: 10,
6655 })
6656 .unwrap()
6657 .unwrap()
6658 .expect("selected target");
6659 let graph_options = crate::query::graph::GraphTraversalOptions {
6660 resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
6661 symbol_id: Some(target.symbol_id),
6662 logical_symbol_id: target.logical_symbol_id,
6663 ..Default::default()
6664 };
6665 let callers =
6666 db.graph_traversal_report("find_callers", &target, true, 10, &graph_options).unwrap();
6667 let edge_id = callers.results[0].edge_id;
6668
6669 let edge_memory = db
6670 .memory_create(crate::query::memory::RepoMemoryCreate {
6671 kind: "Risk".to_string(),
6672 title: "caller_edge to target_edge must stay synchronous".to_string(),
6673 body: "This specific call path is used to prove edge-bound memories surface when impact crosses the edge."
6674 .to_string(),
6675 confidence: "high".to_string(),
6676 created_by: Some("test-agent".to_string()),
6677 source: Some("agent".to_string()),
6678 tags: vec!["edge".to_string()],
6679 bind: crate::query::memory::RepoMemoryBindTarget {
6680 logical_symbol_id: None,
6681 symbol_id: None,
6682 chunk_id: None,
6683 edge_id: Some(edge_id),
6684 path: None,
6685 start_line: None,
6686 end_line: None,
6687 commit_hash: None,
6688 github_owner: None,
6689 github_repo: None,
6690 github_number: None,
6691 start_logical_symbol_id: None,
6692 end_logical_symbol_id: None,
6693 edge_sequence_hash: None,
6694 path_summary: None,
6695 },
6696 })
6697 .unwrap();
6698 assert_eq!(edge_memory.memory.bindings[0].binding_kind, "edge");
6699 assert_eq!(edge_memory.memory.bindings[0].edge_id, Some(edge_id));
6700
6701 let impact = db
6702 .impact_surface_report_for_selected_symbol(
6703 &target,
6704 10,
6705 &crate::query::impact::ImpactSurfaceOptions {
6706 resolution_mode: crate::query::graph::GraphResolutionMode::Exact,
6707 ..Default::default()
6708 },
6709 )
6710 .unwrap();
6711 assert!(impact.repo_memories.direct.is_empty());
6712 assert_eq!(impact.repo_memories.path_crossed.len(), 1);
6713 assert_eq!(impact.repo_memories.path_crossed[0].memory_id, edge_memory.memory.memory_id);
6714 assert_eq!(impact.completeness_and_caveats.memory_status.active, 1);
6715
6716 let call_path_memory = db
6717 .memory_create(crate::query::memory::RepoMemoryCreate {
6718 kind: "TestExpectation".to_string(),
6719 title: "caller_edge path hash recall".to_string(),
6720 body: "Call-path memories are addressable by a deterministic edge sequence hash."
6721 .to_string(),
6722 confidence: "medium".to_string(),
6723 created_by: Some("test-agent".to_string()),
6724 source: Some("agent".to_string()),
6725 tags: vec!["call-path".to_string()],
6726 bind: crate::query::memory::RepoMemoryBindTarget {
6727 logical_symbol_id: None,
6728 symbol_id: None,
6729 chunk_id: None,
6730 edge_id: None,
6731 path: None,
6732 start_line: None,
6733 end_line: None,
6734 commit_hash: None,
6735 github_owner: None,
6736 github_repo: None,
6737 github_number: None,
6738 start_logical_symbol_id: target.logical_symbol_id,
6739 end_logical_symbol_id: target.logical_symbol_id,
6740 edge_sequence_hash: Some("edge-sequence-test-hash".to_string()),
6741 path_summary: Some("caller_edge -> target_edge".to_string()),
6742 },
6743 })
6744 .unwrap();
6745 let call_path = db.memory_for_call_path_hash("edge-sequence-test-hash", 10).unwrap();
6746 assert_eq!(call_path.len(), 1);
6747 assert_eq!(call_path[0].memory_id, call_path_memory.memory.memory_id);
6748 assert_eq!(call_path[0].call_paths[0].path_summary, "caller_edge -> target_edge");
6749
6750 fs::remove_dir_all(root).unwrap();
6751 }
6752
6753 #[test]
6754 fn repo_brief_ranks_churn_and_god_module_candidates() {
6755 let root = unique_temp_root();
6756 let _ = fs::remove_dir_all(&root);
6757 fs::create_dir_all(root.join("src")).unwrap();
6758 run_git(&root, &["init"]);
6759 run_git(&root, &["config", "user.name", "Rag Rat"]);
6760 run_git(&root, &["config", "user.email", "rag@example.com"]);
6761
6762 fs::write(root.join("src/stable.rs"), "pub fn stable() -> i32 { 1 }\n").unwrap();
6763 fs::write(root.join("src/hot.rs"), hot_module_text(0)).unwrap();
6764 run_git(&root, &["add", "."]);
6765 run_git(&root, &["commit", "-m", "Add initial modules"]);
6766
6767 for revision in 1..=3 {
6768 fs::write(root.join("src/hot.rs"), hot_module_text(revision)).unwrap();
6769 run_git(&root, &["add", "src/hot.rs"]);
6770 run_git(&root, &["commit", "-m", "Iterate hot module"]);
6771 }
6772
6773 let config = Config {
6774 root: root.clone(),
6775 database: root.join(".rag-rat/index.sqlite"),
6776 targets: vec![ResolvedTarget {
6777 name: "rust".to_string(),
6778 language: Language::Rust,
6779 directories: vec![PathBuf::from("src")],
6780 include: vec!["**/*.rs".to_string()],
6781 exclude: Vec::new(),
6782 kind: TargetKind::Source,
6783 }],
6784 local_ai: Default::default(),
6785 };
6786 let db = IndexDatabase::rebuild(&config).unwrap();
6787
6788 let churn = db
6789 .repo_brief(crate::query::repo_brief::RepoBriefOptions {
6790 mode: crate::query::repo_brief::RepoBriefMode::Churn,
6791 limit: 1,
6792 include_generated: false,
6793 include_memories: true,
6794 })
6795 .unwrap();
6796 assert_eq!(churn.candidates[0].path, "src/hot.rs");
6797 assert_eq!(churn.candidates[0].category, "recent_churn_hotspot");
6798 assert!(churn.candidates[0].score <= 1.0);
6799 assert!(churn.candidates[0].metrics.commit_touch_count >= 4);
6800 assert!(churn.candidates[0].why.iter().any(|reason| reason.contains("churn")));
6801
6802 let god_modules = db
6803 .repo_brief(crate::query::repo_brief::RepoBriefOptions {
6804 mode: crate::query::repo_brief::RepoBriefMode::GodModules,
6805 limit: 1,
6806 include_generated: false,
6807 include_memories: true,
6808 })
6809 .unwrap();
6810 assert_eq!(god_modules.candidates[0].path, "src/hot.rs");
6811 assert!(god_modules.candidates[0].score <= 1.0);
6812 assert!(god_modules.candidates[0].metrics.symbol_count >= 30);
6813 assert!(!god_modules.candidates[0].split_hints.is_empty());
6814 assert!(
6815 god_modules.candidates[0].next_tools.iter().any(|tool| tool.tool == "impact_surface")
6816 );
6817
6818 fs::remove_dir_all(root).unwrap();
6819 }
6820
6821 #[test]
6822 fn repo_clusters_groups_cotouched_files() {
6823 let root = unique_temp_root();
6824 let _ = fs::remove_dir_all(&root);
6825 fs::create_dir_all(root.join("src/sync")).unwrap();
6826 fs::create_dir_all(root.join("src/ui")).unwrap();
6827 run_git(&root, &["init"]);
6828 run_git(&root, &["config", "user.name", "Rag Rat"]);
6829 run_git(&root, &["config", "user.email", "rag@example.com"]);
6830
6831 fs::write(root.join("src/sync/actor.rs"), "pub fn sync_actor() -> i32 { 1 }\n").unwrap();
6832 fs::write(root.join("src/sync/msg.rs"), "pub fn sync_msg() -> i32 { 2 }\n").unwrap();
6833 fs::write(root.join("src/ui/app.rs"), "pub fn ui_app() -> i32 { 3 }\n").unwrap();
6834 run_git(&root, &["add", "."]);
6835 run_git(&root, &["commit", "-m", "Add modules"]);
6836
6837 for revision in 1..=2 {
6838 fs::write(
6839 root.join("src/sync/actor.rs"),
6840 format!("pub fn sync_actor() -> i32 {{ {revision} }}\n"),
6841 )
6842 .unwrap();
6843 fs::write(
6844 root.join("src/sync/msg.rs"),
6845 format!("pub fn sync_msg() -> i32 {{ {} }}\n", revision + 10),
6846 )
6847 .unwrap();
6848 run_git(&root, &["add", "src/sync/actor.rs", "src/sync/msg.rs"]);
6849 run_git(&root, &["commit", "-m", "Iterate sync modules"]);
6850 }
6851
6852 let config = Config {
6853 root: root.clone(),
6854 database: root.join(".rag-rat/index.sqlite"),
6855 targets: vec![ResolvedTarget {
6856 name: "rust".to_string(),
6857 language: Language::Rust,
6858 directories: vec![PathBuf::from("src")],
6859 include: vec!["**/*.rs".to_string()],
6860 exclude: Vec::new(),
6861 kind: TargetKind::Source,
6862 }],
6863 local_ai: Default::default(),
6864 };
6865 let db = IndexDatabase::rebuild(&config).unwrap();
6866
6867 let clusters = db
6868 .repo_clusters(crate::query::clusters::RepoClustersOptions {
6869 limit: 5,
6870 include_generated: false,
6871 include_memories: true,
6872 min_cluster_size: 2,
6873 })
6874 .unwrap();
6875
6876 let sync_cluster = clusters
6877 .clusters
6878 .iter()
6879 .find(|cluster| cluster.name == "src/sync")
6880 .expect("sync cluster");
6881 assert!(sync_cluster.representative_paths.contains(&"src/sync/actor.rs".to_string()));
6882 assert!(sync_cluster.representative_paths.contains(&"src/sync/msg.rs".to_string()));
6883 assert!(sync_cluster.metrics.co_touch_edges >= 2);
6884
6885 fs::remove_dir_all(root).unwrap();
6886 }
6887
6888 fn hot_module_text(revision: usize) -> String {
6889 let mut text = String::new();
6890 text.push_str("pub fn entry() -> i32 {\n");
6891 for i in 0..32 {
6892 text.push_str(&format!(" helper_{i}() +\n"));
6893 }
6894 text.push_str(&format!(" {revision}\n}}\n"));
6895 for i in 0..32 {
6896 text.push_str(&format!("pub fn helper_{i}() -> i32 {{ {i} }}\n"));
6897 }
6898 text
6899 }
6900
6901 fn unique_temp_root() -> PathBuf {
6902 let mut root = std::env::temp_dir();
6903 let suffix = TEMP_COUNTER.fetch_add(1, Ordering::Relaxed);
6904 root.push(format!("rag-rat-schema-test-{}-{}-{suffix}", std::process::id(), now_ms()));
6905 root
6906 }
6907
6908 fn fixture_temp_root(fixture: &str) -> PathBuf {
6909 let root = unique_temp_root();
6910 let _ = fs::remove_dir_all(&root);
6911 let fixture_root =
6912 PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../tests/fixtures").join(fixture);
6913 copy_fixture_dir(&fixture_root, &root);
6914 root
6915 }
6916
6917 fn copy_fixture_dir(from: &Path, to: &Path) {
6918 fs::create_dir_all(to).unwrap();
6919 for entry in fs::read_dir(from).unwrap() {
6920 let entry = entry.unwrap();
6921 let from_path = entry.path();
6922 let to_path = to.join(entry.file_name());
6923 if from_path.is_dir() {
6924 copy_fixture_dir(&from_path, &to_path);
6925 } else {
6926 fs::copy(&from_path, &to_path).unwrap();
6927 }
6928 }
6929 }
6930
6931 fn markdown_config(text: &str) -> (PathBuf, Config) {
6932 let root = unique_temp_root();
6933 let _ = fs::remove_dir_all(&root);
6934 let docs = root.join("docs");
6935 fs::create_dir_all(&docs).unwrap();
6936 fs::write(docs.join("search.md"), text).unwrap();
6937 let config = markdown_config_for_root(root.clone());
6938 (root, config)
6939 }
6940
6941 fn markdown_config_for_root(root: PathBuf) -> Config {
6942 Config {
6943 root: root.clone(),
6944 database: root.join(".rag-rat/index.sqlite"),
6945 targets: vec![ResolvedTarget {
6946 name: "markdown".to_string(),
6947 language: Language::Markdown,
6948 directories: vec![PathBuf::from("docs")],
6949 include: vec!["**/*.md".to_string()],
6950 exclude: Vec::new(),
6951 kind: TargetKind::Docs,
6952 }],
6953 local_ai: Default::default(),
6954 }
6955 }
6956
6957 fn source_config(root: PathBuf, language: Language) -> Config {
6958 Config {
6959 root: root.clone(),
6960 database: root.join(".rag-rat/index.sqlite"),
6961 targets: vec![ResolvedTarget {
6962 name: language.as_str().to_string(),
6963 language,
6964 directories: vec![PathBuf::from("src")],
6965 include: vec!["src/".to_string()],
6966 exclude: Vec::new(),
6967 kind: TargetKind::Source,
6968 }],
6969 local_ai: Default::default(),
6970 }
6971 }
6972
6973 fn assert_edge(db: &IndexDatabase, from: &str, to: &str, edge_kind: &str, confidence: &str) {
6974 let count = db
6975 .storage
6976 .connection()
6977 .query_row(
6978 "
6979 SELECT COUNT(*)
6980 FROM edges
6981 WHERE edge_kind = ?1
6982 AND confidence = ?2
6983 AND COALESCE(from_name, '') LIKE ?3
6984 AND to_name LIKE ?4
6985 ",
6986 params![edge_kind, confidence, format!("%{from}%"), format!("%{to}%")],
6987 |row| row.get::<_, i64>(0),
6988 )
6989 .unwrap();
6990 assert!(count > 0, "missing edge {from} -[{edge_kind}/{confidence}]-> {to}");
6991 }
6992
6993 fn table_count(db: &IndexDatabase, table: &str) -> i64 {
6994 db.storage
6995 .connection()
6996 .query_row("SELECT COUNT(*) FROM sqlite_master WHERE name = ?1", [table], |row| {
6997 row.get(0)
6998 })
6999 .unwrap()
7000 }
7001
7002 fn row_count(db: &IndexDatabase, table: &str) -> i64 {
7003 db.storage
7004 .connection()
7005 .query_row(&format!("SELECT COUNT(*) FROM {table}"), [], |row| row.get(0))
7006 .unwrap()
7007 }
7008
7009 fn chunk_columns(db: &IndexDatabase) -> Vec<String> {
7010 table_columns(db, "chunks")
7011 }
7012
7013 fn file_columns(db: &IndexDatabase) -> Vec<String> {
7014 table_columns(db, "files")
7015 }
7016
7017 fn table_columns(db: &IndexDatabase, table: &str) -> Vec<String> {
7018 let mut stmt =
7019 db.storage.connection().prepare(&format!("PRAGMA table_info({table})")).unwrap();
7020 stmt.query_map([], |row| row.get::<_, String>(1)).unwrap().map(Result::unwrap).collect()
7021 }
7022
7023 fn indexed_revision_count(db: &IndexDatabase) -> i64 {
7024 db.storage
7025 .connection()
7026 .query_row("SELECT COUNT(*) FROM files WHERE indexed_revision != ''", [], |row| {
7027 row.get(0)
7028 })
7029 .unwrap()
7030 }
7031
7032 fn chunk_source_revision_count(db: &IndexDatabase) -> i64 {
7033 db.storage
7034 .connection()
7035 .query_row("SELECT COUNT(*) FROM chunks WHERE source_revision != ''", [], |row| {
7036 row.get(0)
7037 })
7038 .unwrap()
7039 }
7040
7041 fn first_chunk_id(db: &IndexDatabase) -> i64 {
7042 db.storage
7043 .connection()
7044 .query_row("SELECT id FROM chunks ORDER BY id LIMIT 1", [], |row| row.get(0))
7045 .unwrap()
7046 }
7047
7048 fn run_git(root: &Path, args: &[&str]) {
7049 let output = Command::new("git").args(args).current_dir(root).output().unwrap();
7050 assert!(
7051 output.status.success(),
7052 "git {:?} failed\nstdout:\n{}\nstderr:\n{}",
7053 args,
7054 String::from_utf8_lossy(&output.stdout),
7055 String::from_utf8_lossy(&output.stderr)
7056 );
7057 }
7058
7059 struct MockGitHubClient;
7060
7061 impl github::GitHubClient for MockGitHubClient {
7062 fn issue(
7063 &self,
7064 owner: &str,
7065 repo: &str,
7066 number: i64,
7067 ) -> anyhow::Result<github::GitHubIssue> {
7068 Ok(github::GitHubIssue {
7069 owner: owner.to_string(),
7070 repo: repo.to_string(),
7071 number,
7072 html_url: format!("https://github.com/{owner}/{repo}/issues/{number}"),
7073 state: "open".to_string(),
7074 title: "Decision: keep sqlite".to_string(),
7075 body: "We decided sqlite is required for binary size.".to_string(),
7076 author: Some("octo".to_string()),
7077 created_at: Some("2026-01-01T00:00:00Z".to_string()),
7078 updated_at: Some("2026-01-02T00:00:00Z".to_string()),
7079 is_pull_request: true,
7080 })
7081 }
7082
7083 fn issue_comments(
7084 &self,
7085 owner: &str,
7086 repo: &str,
7087 number: i64,
7088 ) -> anyhow::Result<Vec<github::GitHubComment>> {
7089 Ok(vec![github::GitHubComment {
7090 id: 4201,
7091 owner: owner.to_string(),
7092 repo: repo.to_string(),
7093 number,
7094 html_url: format!("https://github.com/{owner}/{repo}/issues/{number}#comment-1"),
7095 body: "Rejected alternative: duckdb was too large.".to_string(),
7096 author: Some("octo".to_string()),
7097 created_at: Some("2026-01-01T01:00:00Z".to_string()),
7098 updated_at: Some("2026-01-01T01:00:00Z".to_string()),
7099 }])
7100 }
7101
7102 fn pull(
7103 &self,
7104 owner: &str,
7105 repo: &str,
7106 number: i64,
7107 ) -> anyhow::Result<Option<github::GitHubPullRequest>> {
7108 Ok(Some(github::GitHubPullRequest {
7109 owner: owner.to_string(),
7110 repo: repo.to_string(),
7111 number,
7112 html_url: format!("https://github.com/{owner}/{repo}/pull/{number}"),
7113 state: "open".to_string(),
7114 title: "Use sqlite".to_string(),
7115 body: "Constraint: normal queries must use cache only.".to_string(),
7116 author: Some("octo".to_string()),
7117 created_at: Some("2026-01-01T00:00:00Z".to_string()),
7118 updated_at: Some("2026-01-02T00:00:00Z".to_string()),
7119 merged_at: None,
7120 }))
7121 }
7122
7123 fn pull_reviews(
7124 &self,
7125 owner: &str,
7126 repo: &str,
7127 number: i64,
7128 ) -> anyhow::Result<Vec<github::GitHubReview>> {
7129 Ok(vec![github::GitHubReview {
7130 id: 4202,
7131 owner: owner.to_string(),
7132 repo: repo.to_string(),
7133 number,
7134 html_url: Some(format!("https://github.com/{owner}/{repo}/pull/{number}#review")),
7135 state: "COMMENTED".to_string(),
7136 body: "Risk: live crawling during search would be surprising.".to_string(),
7137 author: Some("reviewer".to_string()),
7138 submitted_at: Some("2026-01-01T02:00:00Z".to_string()),
7139 }])
7140 }
7141
7142 fn pull_review_comments(
7143 &self,
7144 owner: &str,
7145 repo: &str,
7146 number: i64,
7147 ) -> anyhow::Result<Vec<github::GitHubReviewComment>> {
7148 Ok(vec![github::GitHubReviewComment {
7149 id: 4203,
7150 owner: owner.to_string(),
7151 repo: repo.to_string(),
7152 number,
7153 path: Some("docs/search.md".to_string()),
7154 html_url: format!("https://github.com/{owner}/{repo}/pull/{number}#discussion"),
7155 body: "No longer use obsolete duckdb rationale.".to_string(),
7156 author: Some("reviewer".to_string()),
7157 created_at: Some("2026-01-01T03:00:00Z".to_string()),
7158 updated_at: Some("2026-01-01T03:00:00Z".to_string()),
7159 }])
7160 }
7161 }
7162
7163 struct PartiallyFailingGitHubClient;
7164
7165 impl github::GitHubClient for PartiallyFailingGitHubClient {
7166 fn issue(
7167 &self,
7168 owner: &str,
7169 repo: &str,
7170 number: i64,
7171 ) -> anyhow::Result<github::GitHubIssue> {
7172 if number == 404 {
7173 anyhow::bail!("gh: Not Found (HTTP 404)");
7174 }
7175 MockGitHubClient.issue(owner, repo, number)
7176 }
7177
7178 fn issue_comments(
7179 &self,
7180 owner: &str,
7181 repo: &str,
7182 number: i64,
7183 ) -> anyhow::Result<Vec<github::GitHubComment>> {
7184 MockGitHubClient.issue_comments(owner, repo, number)
7185 }
7186
7187 fn pull(
7188 &self,
7189 owner: &str,
7190 repo: &str,
7191 number: i64,
7192 ) -> anyhow::Result<Option<github::GitHubPullRequest>> {
7193 MockGitHubClient.pull(owner, repo, number)
7194 }
7195
7196 fn pull_reviews(
7197 &self,
7198 owner: &str,
7199 repo: &str,
7200 number: i64,
7201 ) -> anyhow::Result<Vec<github::GitHubReview>> {
7202 MockGitHubClient.pull_reviews(owner, repo, number)
7203 }
7204
7205 fn pull_review_comments(
7206 &self,
7207 owner: &str,
7208 repo: &str,
7209 number: i64,
7210 ) -> anyhow::Result<Vec<github::GitHubReviewComment>> {
7211 MockGitHubClient.pull_review_comments(owner, repo, number)
7212 }
7213 }
7214}