use super::*;
impl IndexDatabase {
pub fn status(&self, database: &Path) -> anyhow::Result<IndexStatus> {
let mut counts = BTreeMap::new();
let mut stmt = self
.storage
.connection()
.prepare("SELECT language, COUNT(*) FROM files GROUP BY language ORDER BY language")?;
let rows =
stmt.query_map([], |row| Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?)))?;
for row in rows {
let (language, count) = row?;
counts.insert(language, u64::try_from(count).unwrap_or(0));
}
let content_revision = self.content_revision()?;
let fts_source_revision = self.meta("fts_source_revision")?;
let fts_dirty = self.fts_dirty()?;
Ok(IndexStatus {
database: database.display().to_string(),
exists: database.exists(),
schema: schema::status(self.storage.connection())?,
git_commit: self.meta("git_commit")?,
git_dirty: self.meta("git_dirty")?.map(|value| value == "true"),
indexed_at_ms: self.meta("indexed_at_ms")?.and_then(|value| value.parse::<i64>().ok()),
content_revision: content_revision.clone(),
fts_synced_at_ms: self
.meta("fts_synced_at_ms")?
.and_then(|value| value.parse::<i64>().ok()),
fts_dirty,
fts_fresh: !fts_dirty
&& fts_source_revision.as_deref() == Some(content_revision.as_str()),
fts_source_revision,
file_count_by_language: counts,
parser_failures: self.parser_failure_count()?,
parser_failure_paths: self.parser_failure_paths()?,
git_history: self.git_history_status()?,
github: self.github_status()?,
local_ai: self.local_ai_status()?,
anchor_health: crate::query::memory::anchor_health_counts(self.storage.connection())
.unwrap_or_default(),
})
}
pub fn memory_anchor_health(&self) -> anyhow::Result<AnchorHealth> {
crate::query::memory::anchor_health_counts(self.storage.connection())
}
pub fn storage_status(&self) -> anyhow::Result<StorageStatus> {
self.storage.status()
}
pub fn discovery_status(&self, config: &Config) -> anyhow::Result<DiscoveryStatus> {
let plan = discovery_plan(self.storage.connection(), config)?;
let unindexed_source_files =
plan.unindexed.iter().filter(|file| file.kind == TargetKind::Source).count();
let unindexed_sample =
plan.unindexed.iter().take(10).map(|file| path_string(&file.relative_path)).collect();
let warning = (unindexed_source_files > 0).then(|| {
format!(
"{unindexed_source_files} unindexed source files detected. Run `rag-rat index \
--full` or `rag-rat index --discover`."
)
});
Ok(DiscoveryStatus {
discovered_files: plan.discovered_files,
indexed_files: plan.indexed_files,
unindexed_files: plan.unindexed.len(),
unindexed_source_files,
changed_indexed_files: plan.changed.len(),
removed_indexed_files: plan.deleted.len(),
unindexed_sample,
warning,
})
}
pub fn search(
&self,
query: &str,
limit: u32,
include_generated: bool,
) -> anyhow::Result<Vec<SearchHit>> {
self.search_with_graph_meta(query, limit, include_generated, GraphMetaMode::Compact, 3)
}
pub fn search_explain(
&self,
query: &str,
limit: u32,
include_generated: bool,
) -> anyhow::Result<Vec<SearchHit>> {
self.search_explain_with_graph_meta(
query,
limit,
include_generated,
GraphMetaMode::Compact,
3,
)
}
pub fn search_with_graph_meta(
&self,
query: &str,
limit: u32,
include_generated: bool,
graph_mode: GraphMetaMode,
graph_limit: u32,
) -> anyhow::Result<Vec<SearchHit>> {
self.search_with_graph_meta_options(
query,
limit,
include_generated,
graph_mode,
graph_limit,
SearchOptions::default(),
)
}
pub fn search_with_graph_meta_options(
&self,
query: &str,
limit: u32,
include_generated: bool,
graph_mode: GraphMetaMode,
graph_limit: u32,
options: SearchOptions,
) -> anyhow::Result<Vec<SearchHit>> {
self.ensure_fts_fresh()?;
let mut hits =
self.search_with_heal(query, limit, include_generated, true, false, options)?;
graph_meta::attach_to_search_hits(
self.storage.connection(),
&mut hits,
graph_mode,
graph_limit,
)?;
Ok(hits)
}
pub fn search_explain_with_graph_meta(
&self,
query: &str,
limit: u32,
include_generated: bool,
graph_mode: GraphMetaMode,
graph_limit: u32,
) -> anyhow::Result<Vec<SearchHit>> {
self.search_explain_with_graph_meta_options(
query,
limit,
include_generated,
graph_mode,
graph_limit,
SearchOptions::default(),
)
}
pub fn search_explain_with_graph_meta_options(
&self,
query: &str,
limit: u32,
include_generated: bool,
graph_mode: GraphMetaMode,
graph_limit: u32,
options: SearchOptions,
) -> anyhow::Result<Vec<SearchHit>> {
self.ensure_fts_fresh()?;
let mut hits =
self.search_with_heal(query, limit, include_generated, true, true, options)?;
graph_meta::attach_to_search_hits(
self.storage.connection(),
&mut hits,
graph_mode,
graph_limit,
)?;
Ok(hits)
}
pub fn symbols(
&self,
name: &str,
language: Option<Language>,
limit: u32,
) -> anyhow::Result<Vec<crate::query::symbol::SymbolHit>> {
crate::query::symbol::lookup(self.storage.connection(), name, language, limit)
}
pub fn symbol_candidates(
&self,
selector: &crate::query::symbol::SymbolSelector,
) -> anyhow::Result<crate::query::symbol::SymbolLookup> {
crate::query::symbol::lookup_candidates(self.storage.connection(), selector)
}
pub fn select_symbol(
&self,
selector: &crate::query::symbol::SymbolSelector,
) -> anyhow::Result<
Result<Option<crate::query::symbol::SymbolHit>, crate::query::symbol::SymbolDisambiguation>,
> {
crate::query::symbol::select_one(self.storage.connection(), selector)
}
pub fn read_chunk(&self, chunk_id: i64) -> anyhow::Result<Option<crate::query::ReadChunk>> {
self.read_chunk_with_graph_and_memories(chunk_id, GraphMetaMode::Full, 20, true)
}
pub fn read_chunk_with_graph(
&self,
chunk_id: i64,
graph_mode: GraphMetaMode,
graph_limit: u32,
) -> anyhow::Result<Option<crate::query::ReadChunk>> {
self.read_chunk_with_graph_and_memories(chunk_id, graph_mode, graph_limit, false)
}
pub fn read_chunk_with_graph_and_memories(
&self,
chunk_id: i64,
graph_mode: GraphMetaMode,
graph_limit: u32,
include_memories: bool,
) -> anyhow::Result<Option<crate::query::ReadChunk>> {
let Some(mut chunk) = self.read_chunk_current(chunk_id)? else {
return Ok(None);
};
graph_meta::attach_to_read_chunk(
self.storage.connection(),
&mut chunk,
graph_mode,
graph_limit,
)?;
if include_memories {
chunk.memories =
crate::query::memory::memories_for_chunk(self.storage.connection(), chunk_id, 20)?;
}
Ok(Some(chunk))
}
fn read_chunk_current(&self, chunk_id: i64) -> anyhow::Result<Option<crate::query::ReadChunk>> {
let Some(mut chunk) = crate::query::read_chunk(self.storage.connection(), chunk_id)? else {
return Ok(None);
};
let Some(root) = self.storage.source_root() else {
return Ok(Some(chunk));
};
let source_path = root.join(&chunk.path);
let current_text = match fs::read_to_string(&source_path) {
Ok(text) => text,
Err(_) => {
let path = chunk.path.clone();
self.mark_file_deleted(Path::new(&path))?;
self.sync_fts()?;
anyhow::bail!(IndexError::Gone { chunk_id });
},
};
let anchor = self.chunk_anchor(chunk_id)?;
let status = anchors::validate(
&chunk.text,
usize::try_from(chunk.start_line).unwrap_or(1),
usize::try_from(chunk.end_line).unwrap_or(1),
&anchor,
¤t_text,
);
match status {
AnchorStatus::Exact => {
if let Some(text) = anchors::slice_lines(
¤t_text,
usize::try_from(chunk.start_line).unwrap_or(1),
usize::try_from(chunk.end_line).unwrap_or(1),
) {
chunk.text = text;
}
Ok(Some(chunk))
},
AnchorStatus::Relocated { start_line, end_line, text } => {
chunk.start_line = i64::try_from(start_line)?;
chunk.end_line = i64::try_from(end_line)?;
chunk.text = text;
Ok(Some(chunk))
},
AnchorStatus::Stale => {
self.heal_file(Path::new(&chunk.path))?;
self.sync_fts()?;
let healed = crate::query::read_chunk(self.storage.connection(), chunk_id)?;
match healed {
Some(chunk) => Ok(Some(chunk)),
None => anyhow::bail!(IndexError::StaleChunk { chunk_id, path: chunk.path }),
}
},
}
}
pub fn search_hash_baseline(
&self,
query: &str,
limit: u32,
include_generated: bool,
) -> anyhow::Result<Vec<SearchHit>> {
self.ensure_fts_fresh()?;
crate::search::lexical::search_hash_baseline(
self.storage.connection(),
query,
limit,
include_generated,
)
}
pub fn docs_for_symbol(&self, symbol: &str, limit: u32) -> anyhow::Result<Vec<SearchHit>> {
self.search(symbol, limit, true)
}
pub fn docs_for_selected_symbol(
&self,
symbol: &crate::query::symbol::SymbolHit,
limit: u32,
) -> anyhow::Result<Vec<SearchHit>> {
let mut hits = self.local_symbol_context_hits(symbol, limit)?;
hits.extend(self.search(&symbol.name, limit.saturating_mul(4).max(limit), true)?);
rank_docs_for_symbol(symbol, &mut hits);
dedupe_search_hits(&mut hits);
hits.truncate(usize::try_from(limit).unwrap_or(usize::MAX));
Ok(hits)
}
pub fn commit_search(&self, query: &str, limit: u32) -> anyhow::Result<Vec<CommitSearchHit>> {
git_history::commit_search(self.storage.connection(), query, limit)
}
pub fn git_history_for_path(
&self,
path: &str,
limit: u32,
) -> anyhow::Result<Vec<PathHistoryItem>> {
git_history::history_for_path(self.storage.connection(), path, limit)
}
pub fn git_history_for_symbol(
&self,
symbol: &str,
language: Option<Language>,
limit: u32,
) -> anyhow::Result<Vec<SymbolHistoryItem>> {
let symbols = self.symbols(symbol, language, limit)?;
let per_symbol_limit = limit.max(1);
let mut out = Vec::new();
for symbol_hit in symbols {
for commit in self.git_history_for_path(&symbol_hit.path, per_symbol_limit)? {
out.push(SymbolHistoryItem {
symbol: symbol_hit.name.clone(),
qualified_name: symbol_hit.qualified_name.clone(),
path: symbol_hit.path.clone(),
start_byte: symbol_hit.start_byte,
end_byte: symbol_hit.end_byte,
commit,
evidence_kind: "historical",
});
if out.len() >= usize::try_from(limit).unwrap_or(usize::MAX) {
return Ok(out);
}
}
}
Ok(out)
}
pub fn commits_touching_query(
&self,
query: &str,
limit: u32,
) -> anyhow::Result<Vec<QueryCommitHit>> {
let current_hits = self.search(query, limit, true)?;
git_history::commits_touching_query(self.storage.connection(), query, limit, ¤t_hits)
}
pub fn git_blame_chunk(&self, chunk_id: i64) -> anyhow::Result<Option<ChunkBlameSummary>> {
let Some(chunk) = self.read_chunk(chunk_id)? else {
return Ok(None);
};
let source_text_hash = git_history::source_text_hash(&chunk.text);
if let Some(cached) =
git_history::cached_blame(self.storage.connection(), chunk_id, &source_text_hash)?
{
return Ok(Some(cached));
}
let Some(root) = self.storage.source_root() else {
return Ok(Some(ChunkBlameSummary {
chunk_id,
path: chunk.path,
start_line: chunk.start_line,
end_line: chunk.end_line,
source_text_hash,
line_count: 0,
dominant_commit: None,
dominant_commit_lines: 0,
newest_commit: None,
newest_commit_time_s: None,
oldest_commit: None,
oldest_commit_time_s: None,
commit_counts: BTreeMap::new(),
evidence_kind: "historical",
}));
};
let blame_lines =
git_history::blame_lines(root, &chunk.path, chunk.start_line, chunk.end_line);
let mut counts = BTreeMap::<String, i64>::new();
let mut newest = None::<(String, i64)>;
let mut oldest = None::<(String, i64)>;
for line in &blame_lines {
*counts.entry(line.commit.clone()).or_default() += 1;
if let Some(time) = line.author_time_s {
if newest.as_ref().is_none_or(|(_, newest_time)| time > *newest_time) {
newest = Some((line.commit.clone(), time));
}
if oldest.as_ref().is_none_or(|(_, oldest_time)| time < *oldest_time) {
oldest = Some((line.commit.clone(), time));
}
}
}
let dominant = counts
.iter()
.max_by_key(|(commit, count)| (*count, *commit))
.map(|(commit, count)| (commit.clone(), *count));
let summary = ChunkBlameSummary {
chunk_id,
path: chunk.path,
start_line: chunk.start_line,
end_line: chunk.end_line,
source_text_hash,
line_count: i64::try_from(blame_lines.len()).unwrap_or(i64::MAX),
dominant_commit: dominant.as_ref().map(|(commit, _)| commit.clone()),
dominant_commit_lines: dominant.map(|(_, count)| count).unwrap_or(0),
newest_commit: newest.as_ref().map(|(commit, _)| commit.clone()),
newest_commit_time_s: newest.as_ref().map(|(_, time)| *time),
oldest_commit: oldest.as_ref().map(|(commit, _)| commit.clone()),
oldest_commit_time_s: oldest.as_ref().map(|(_, time)| *time),
commit_counts: counts,
evidence_kind: "historical",
};
git_history::store_blame(self.storage.connection(), &summary)?;
Ok(Some(summary))
}
pub fn github_sync_from_refs(&self, offline: bool) -> anyhow::Result<GitHubSyncReport> {
self.github_sync_from_refs_with_progress(offline, |_| {})
}
pub fn github_sync_from_refs_with_progress(
&self,
offline: bool,
progress: impl FnMut(github::GitHubSyncProgress),
) -> anyhow::Result<GitHubSyncReport> {
let Some(root) = self.storage.source_root() else {
anyhow::bail!("index has no source_root metadata; rebuild required");
};
if offline {
github::sync_from_refs::<github::GhCliGitHubClient>(
self.storage.connection(),
root,
None,
true,
&self.github,
)
} else {
let client = github::GhCliGitHubClient;
github::sync_from_refs_with_progress(
self.storage.connection(),
root,
Some(&client),
false,
&self.github,
progress,
)
}
}
pub fn github_sync_issue(
&self,
issue_ref: &str,
offline: bool,
) -> anyhow::Result<GitHubSyncReport> {
if offline {
github::sync_issue::<github::GhCliGitHubClient>(
self.storage.connection(),
issue_ref,
None,
true,
&self.github,
)
} else {
let client = github::GhCliGitHubClient;
github::sync_issue(
self.storage.connection(),
issue_ref,
Some(&client),
false,
&self.github,
)
}
}
pub fn github_issue_search(
&self,
query: &str,
limit: u32,
) -> anyhow::Result<Vec<GitHubEvidence>> {
github::issue_search(self.storage.connection(), query, limit)
}
pub fn rationale_search(&self, query: &str, limit: u32) -> anyhow::Result<Vec<GitHubEvidence>> {
github::rationale_search(self.storage.connection(), query, limit, &self.github)
}
pub fn github_refs_for_path(
&self,
path: &str,
limit: u32,
) -> anyhow::Result<Vec<github::GitHubRef>> {
github::refs_for_path(self.storage.connection(), path, limit)
}
pub fn github_sync_status(&self) -> anyhow::Result<GitHubStatus> {
self.github_status()
}
pub fn papertrail_for_chunk(
&self,
chunk_id: i64,
limit: u32,
) -> anyhow::Result<Option<Papertrail>> {
let Some(chunk) = self.read_chunk(chunk_id)? else {
return Ok(None);
};
Ok(Some(github::papertrail_for_chunk(
self.storage.connection(),
&chunk,
limit,
&self.github,
)?))
}
pub fn papertrail_for_symbol(
&self,
symbol: &str,
language: Option<Language>,
limit: u32,
) -> anyhow::Result<Option<Papertrail>> {
let Some(symbol) = self.symbols(symbol, language, limit)?.into_iter().next() else {
return Ok(None);
};
Ok(Some(github::papertrail_for_symbol(
self.storage.connection(),
&symbol,
limit,
&self.github,
)?))
}
pub fn papertrail_for_selected_symbol(
&self,
symbol: &crate::query::symbol::SymbolHit,
limit: u32,
) -> anyhow::Result<Papertrail> {
github::papertrail_for_symbol(self.storage.connection(), symbol, limit, &self.github)
}
pub fn papertrail_for_commit(
&self,
commit_hash: &str,
limit: u32,
) -> anyhow::Result<Papertrail> {
github::papertrail_for_commit(self.storage.connection(), commit_hash, limit, &self.github)
}
pub fn local_ai_status(&self) -> anyhow::Result<LocalAiStatus> {
ai::status(self.storage.connection())
}
pub fn list_models(&self) -> anyhow::Result<Vec<ModelInfo>> {
ai::models(self.storage.connection())
}
pub fn install_model(&self, model_id: &str) -> anyhow::Result<ModelInfo> {
ai::install_model(self.storage.connection(), model_id)
}
pub fn reconcile(
&self,
limit: Option<u32>,
batch_size: Option<u32>,
) -> anyhow::Result<ReconcileReport> {
ai::reconcile(self.storage.connection(), limit, batch_size)
}
pub fn reconcile_plan(&self) -> anyhow::Result<ReconcilePlan> {
ai::reconcile_plan(self.storage.connection())
}
pub fn reconcile_with_progress(
&self,
limit: Option<u32>,
batch_size: Option<u32>,
force: bool,
progress: impl FnMut(ai::ReconcileProgress),
) -> anyhow::Result<ReconcileReport> {
ai::reconcile_with_progress(self.storage.connection(), limit, batch_size, force, progress)
}
pub fn reconcile_with_options_progress(
&self,
options: ai::ReconcileOptions,
progress: impl FnMut(ai::ReconcileProgress),
) -> anyhow::Result<ReconcileReport> {
ai::reconcile_with_options_progress(self.storage.connection(), options, progress)
}
pub fn gc(&self) -> anyhow::Result<GcReport> {
let mut live_commits = Vec::new();
let mut live_worktrees = Vec::new();
if let Some(root) = self.storage.source_root() {
let (commits, worktrees) = live_worktree_contexts(root);
live_commits.extend(commits);
live_worktrees.extend(worktrees);
}
if !self.active_commit_sha.is_empty() {
live_commits.push(self.active_commit_sha.clone());
}
if !self.active_worktree_id.is_empty() {
live_worktrees.push(self.active_worktree_id.clone());
}
live_commits.sort();
live_commits.dedup();
live_worktrees.sort();
live_worktrees.dedup();
self.prune_to_live(&live_commits, &live_worktrees)
}
pub fn prune_to_live(
&self,
live_commits: &[String],
live_worktrees: &[String],
) -> anyhow::Result<GcReport> {
let conn = self.storage.connection();
let files_before = table_row_count(conn, "files")?;
let chunks_before = table_row_count(conn, "chunks")?;
if live_commits.is_empty() && live_worktrees.is_empty() {
return Ok(GcReport {
files_pruned: 0,
chunks_pruned: 0,
files_remaining: files_before,
chunks_remaining: chunks_before,
skipped: true,
});
}
conn.execute_batch(
"
CREATE TEMP TABLE IF NOT EXISTS gc_live_commits(sha TEXT PRIMARY KEY);
DELETE FROM temp.gc_live_commits;
CREATE TEMP TABLE IF NOT EXISTS gc_live_worktrees(id TEXT PRIMARY KEY);
DELETE FROM temp.gc_live_worktrees;
CREATE TEMP TABLE IF NOT EXISTS staged_file_ids(id INTEGER PRIMARY KEY);
DELETE FROM temp.staged_file_ids;
",
)?;
{
let mut stmt =
conn.prepare("INSERT OR IGNORE INTO temp.gc_live_commits(sha) VALUES (?1)")?;
for sha in live_commits {
stmt.execute([sha])?;
}
}
{
let mut stmt =
conn.prepare("INSERT OR IGNORE INTO temp.gc_live_worktrees(id) VALUES (?1)")?;
for id in live_worktrees {
stmt.execute([id])?;
}
}
conn.execute(
"
INSERT OR IGNORE INTO temp.staged_file_ids(id)
SELECT id FROM main.files
WHERE commit_sha NOT IN (SELECT sha FROM temp.gc_live_commits)
AND worktree_id NOT IN (SELECT id FROM temp.gc_live_worktrees)
",
[],
)?;
self.delete_staged_files_cascade()?;
conn.execute_batch("DELETE FROM temp.staged_file_ids;")?;
let files_remaining = table_row_count(conn, "files")?;
let chunks_remaining = table_row_count(conn, "chunks")?;
Ok(GcReport {
files_pruned: files_before.saturating_sub(files_remaining),
chunks_pruned: chunks_before.saturating_sub(chunks_remaining),
files_remaining,
chunks_remaining,
skipped: false,
})
}
pub fn current_embedding_count(&self, model_id: &str) -> anyhow::Result<u64> {
ai::current_embedding_count(self.storage.connection(), model_id)
}
pub fn heal_index(&self, limit: Option<u32>) -> anyhow::Result<HealIndexReport> {
let Some(root) = self.storage.source_root() else {
anyhow::bail!("heal_index requires source_root metadata; run `rag-rat index` first");
};
let indexed_files = self.indexed_files()?;
let max_repairs = limit.map(usize::try_from).transpose()?.unwrap_or(usize::MAX);
let mut report = HealIndexReport {
checked_files: 0,
healed_files: 0,
removed_files: 0,
skipped_files: 0,
fts_fresh: false,
message: None,
};
for file in indexed_files {
report.checked_files += 1;
let path = Path::new(&file.path);
let full_path = root.join(path);
let Ok(text) = fs::read_to_string(&full_path) else {
if usize::try_from(report.healed_files + report.removed_files).unwrap_or(usize::MAX)
>= max_repairs
{
report.message =
Some("limit reached; rerun heal_index to continue".to_string());
break;
}
self.mark_file_deleted(path)?;
report.removed_files += 1;
continue;
};
let sha256 = hex_sha256(text.as_bytes());
if sha256 == file.sha256 {
report.skipped_files += 1;
continue;
}
if usize::try_from(report.healed_files + report.removed_files).unwrap_or(usize::MAX)
>= max_repairs
{
report.message = Some("limit reached; rerun heal_index to continue".to_string());
break;
}
self.heal_file(path)?;
report.healed_files += 1;
}
if report.healed_files > 0 || report.removed_files > 0 {
self.sync_fts()?;
} else {
self.ensure_fts_fresh()?;
}
report.fts_fresh = !self.fts_dirty()?;
Ok(report)
}
pub fn ffi_surface(&self, limit: u32) -> anyhow::Result<Vec<crate::query::impact::ImpactItem>> {
crate::query::impact::ffi_surface(self.storage.connection(), limit)
}
pub fn find_callers(
&self,
symbol: &str,
limit: u32,
) -> anyhow::Result<Vec<crate::query::graph::GraphHop>> {
crate::query::graph::traverse(self.storage.connection(), symbol, true, limit)
}
pub fn find_callers_with_options(
&self,
symbol: &str,
limit: u32,
options: &crate::query::graph::GraphTraversalOptions,
) -> anyhow::Result<Vec<crate::query::graph::GraphHop>> {
let options = self.graph_options_with_logical_group(options)?;
crate::query::graph::traverse_with_options(
self.storage.connection(),
symbol,
true,
limit,
&options,
)
}
pub fn trace_callees(
&self,
symbol: &str,
limit: u32,
) -> anyhow::Result<Vec<crate::query::graph::GraphHop>> {
crate::query::graph::traverse(self.storage.connection(), symbol, false, limit)
}
pub fn trace_callees_with_options(
&self,
symbol: &str,
limit: u32,
options: &crate::query::graph::GraphTraversalOptions,
) -> anyhow::Result<Vec<crate::query::graph::GraphHop>> {
let options = self.graph_options_with_logical_group(options)?;
crate::query::graph::traverse_with_options(
self.storage.connection(),
symbol,
false,
limit,
&options,
)
}
pub fn graph_traversal_report(
&self,
tool: &str,
symbol: &crate::query::symbol::SymbolHit,
reverse: bool,
limit: u32,
options: &crate::query::graph::GraphTraversalOptions,
) -> anyhow::Result<crate::query::graph::GraphTraversalReport> {
let options = self.graph_options_with_logical_group(options)?;
let results = crate::query::graph::traverse_with_options(
self.storage.connection(),
&symbol.qualified_name,
reverse,
limit,
&options,
)?;
let summary = crate::query::graph::traversal_summary(
self.storage.connection(),
&symbol.qualified_name,
reverse,
limit,
&options,
results.len(),
)?;
let (logical_symbol, variants) = self.graph_logical_symbol(options.logical_symbol_id)?;
let mut paths = BTreeSet::new();
paths.insert(symbol.path.clone());
for result in &results {
if let Some(callsite) = &result.callsite {
paths.insert(callsite.path.clone());
}
}
let mut coverage = self.graph_coverage(paths)?;
if summary.unresolved > 0 {
coverage.known_index_gaps.push(format!(
"{} unresolved qualified callsites match the requested final segment but are not \
verified to this symbol",
summary.unresolved
));
}
Ok(crate::query::graph::GraphTraversalReport {
query: crate::query::graph::GraphTraversalQuery {
tool: tool.to_string(),
symbol_id: Some(symbol.symbol_id),
logical_symbol_id: options.logical_symbol_id,
symbol_path: symbol.qualified_name.clone(),
resolution: options.resolution_mode.as_str().to_string(),
},
logical_symbol,
variants,
summary,
coverage,
results,
})
}
pub fn compare_graph_to_text(
&self,
symbol: &crate::query::symbol::SymbolHit,
pattern: &str,
limit: u32,
options: &crate::query::graph::GraphTraversalOptions,
include_tests: bool,
) -> anyhow::Result<crate::query::graph::CompareGraphTextReport> {
let regex = Regex::new(pattern)?;
let options = self.graph_options_with_logical_group(options)?;
let mut graph_edges = crate::query::graph::traverse_with_options(
self.storage.connection(),
&symbol.qualified_name,
true,
limit,
&options,
)?;
if !include_tests {
graph_edges.retain(|edge| {
edge.callsite.as_ref().is_none_or(|callsite| !is_test_like_path(&callsite.path))
});
}
let (logical_symbol, variants) = self.graph_logical_symbol(options.logical_symbol_id)?;
let text_hits = self.regex_hits(pattern, ®ex, include_tests)?;
let text_by_location = text_hits
.iter()
.map(|hit| ((hit.path.clone(), hit.line), hit))
.collect::<BTreeMap<_, _>>();
let graph_by_location = graph_edges
.iter()
.filter_map(|edge| {
edge.callsite
.as_ref()
.map(|callsite| ((callsite.path.clone(), callsite.line), edge))
})
.collect::<BTreeMap<_, _>>();
let mut paths = BTreeSet::new();
paths.insert(symbol.path.clone());
for hit in &text_hits {
paths.insert(hit.path.clone());
}
for edge in &graph_edges {
if let Some(callsite) = &edge.callsite {
paths.insert(callsite.path.clone());
}
}
let parser_failure_paths = self
.parser_failure_paths()?
.into_iter()
.map(|failure| failure.path)
.collect::<BTreeSet<_>>();
let mut matched_hits = Vec::new();
let mut text_only_hits = Vec::new();
let mut likely_parser_gaps = Vec::new();
for hit in &text_hits {
if let Some(edge) = graph_by_location.get(&(hit.path.clone(), hit.line)) {
matched_hits.push(crate::query::graph::MatchedGraphTextHit {
path: hit.path.clone(),
line: hit.line,
text: hit.text.clone(),
target: edge.target.clone(),
edge_kind: edge.edge_kind.clone(),
confidence: edge.confidence.clone(),
resolution: edge.resolution.clone(),
});
} else {
let gap_kind = classify_text_only_hit(&hit.path, &hit.text, &parser_failure_paths);
let text_only_hit = crate::query::graph::TextOnlyHit {
path: hit.path.clone(),
line: hit.line,
text: hit.text.clone(),
reason: if gap_kind == "parser_call_extraction" || gap_kind == "parser_failure"
{
"no graph edge extracted"
} else {
"text mention outside graph-call evidence"
}
.to_string(),
likely_gap: gap_kind.to_string(),
};
if is_likely_parser_gap_kind(gap_kind) {
likely_parser_gaps.push(text_only_hit.clone());
}
text_only_hits.push(text_only_hit);
}
}
let mut graph_only_edges = Vec::new();
let mut likely_false_positives = Vec::new();
for edge in &graph_edges {
let Some(callsite) = &edge.callsite else {
continue;
};
if text_by_location.contains_key(&(callsite.path.clone(), callsite.line)) {
continue;
}
let current_line = self.current_line_text(&callsite.path, callsite.line)?;
let graph_only = crate::query::graph::GraphOnlyEdge {
path: callsite.path.clone(),
line: callsite.line,
target: edge.target.clone(),
edge_kind: edge.edge_kind.clone(),
confidence: edge.confidence.clone(),
resolution: edge.resolution.clone(),
evidence: edge.evidence.clone(),
reason: "graph edge exists but pattern did not match text".to_string(),
likely_reason: graph_only_reason(edge, current_line.as_deref()),
};
if is_likely_false_positive_graph_only(edge, &graph_only) {
likely_false_positives.push(graph_only.clone());
}
graph_only_edges.push(graph_only);
}
let complete = likely_parser_gaps.is_empty() && likely_false_positives.is_empty();
let recommended_fallback =
recommended_graph_text_fallback(&likely_parser_gaps, &graph_only_edges);
let pattern_match_mode = compare_pattern_match_mode(pattern, &symbol.name);
let mut warnings = Vec::new();
if pattern_match_mode == "substring_identifier" {
warnings.push(format!(
"pattern may match identifiers that merely contain `{}`; use an identifier \
boundary or escaped call suffix for exact text auditing",
symbol.name
));
}
Ok(crate::query::graph::CompareGraphTextReport {
query: crate::query::graph::CompareGraphTextQuery {
symbol_id: Some(symbol.symbol_id),
logical_symbol_id: options.logical_symbol_id,
symbol_path: symbol.qualified_name.clone(),
pattern: pattern.to_string(),
resolution: options.resolution_mode.as_str().to_string(),
include_tests,
},
logical_symbol,
variants,
summary: crate::query::graph::CompareGraphTextSummary {
graph_hits: u64::try_from(graph_edges.len()).unwrap_or(u64::MAX),
graph_edges: u64::try_from(graph_edges.len()).unwrap_or(u64::MAX),
text_hits: u64::try_from(text_hits.len()).unwrap_or(u64::MAX),
matched: u64::try_from(matched_hits.len()).unwrap_or(u64::MAX),
graph_only: u64::try_from(graph_only_edges.len()).unwrap_or(u64::MAX),
text_only: u64::try_from(text_only_hits.len()).unwrap_or(u64::MAX),
text_mentions: u64::try_from(text_only_hits.len() - likely_parser_gaps.len())
.unwrap_or(u64::MAX),
likely_parser_gaps: u64::try_from(likely_parser_gaps.len()).unwrap_or(u64::MAX),
likely_false_positives: u64::try_from(likely_false_positives.len())
.unwrap_or(u64::MAX),
likely_index_gaps: u64::try_from(likely_parser_gaps.len()).unwrap_or(u64::MAX),
complete,
recommended_fallback,
pattern_match_mode,
warnings,
},
coverage: self.graph_coverage(paths)?,
matched_hits,
text_only_hits,
graph_only_edges,
likely_parser_gaps,
likely_false_positives,
})
}
fn graph_logical_symbol(
&self,
logical_symbol_id: Option<i64>,
) -> anyhow::Result<(
Option<crate::query::graph::LogicalSymbol>,
Vec<crate::query::graph::LogicalSymbolVariant>,
)> {
let Some(logical_symbol_id) = logical_symbol_id else {
return Ok((None, Vec::new()));
};
let Some(logical) = crate::query::symbol::lookup_logical_by_id(
self.storage.connection(),
logical_symbol_id,
)?
else {
return Ok((None, Vec::new()));
};
let variants = crate::query::symbol::logical_members(
self.storage.connection(),
logical.logical_symbol_id,
)?
.into_iter()
.map(|member| crate::query::graph::LogicalSymbolVariant {
symbol_id: member.symbol_id,
cfg_expr: member.cfg_expr,
signature_hash: member.signature_hash,
start_line: member.start_line,
end_line: member.end_line,
})
.collect::<Vec<_>>();
Ok((
Some(crate::query::graph::LogicalSymbol {
logical_symbol_id: logical.logical_symbol_id,
qualified_name: logical.qualified_name,
variant_count: logical.variant_count,
group_reason: logical.group_reason,
}),
variants,
))
}
fn graph_options_with_logical_group(
&self,
options: &crate::query::graph::GraphTraversalOptions,
) -> anyhow::Result<crate::query::graph::GraphTraversalOptions> {
if options.logical_symbol_id.is_some() {
return Ok(options.clone());
}
let Some(symbol_id) = options.symbol_id else {
return Ok(options.clone());
};
let Some(logical) =
crate::query::symbol::logical_for_symbol_id(self.storage.connection(), symbol_id)?
else {
return Ok(options.clone());
};
let mut options = options.clone();
options.logical_symbol_id = Some(logical.logical_symbol_id);
Ok(options)
}
fn local_symbol_context_hits(
&self,
symbol: &crate::query::symbol::SymbolHit,
limit: u32,
) -> anyhow::Result<Vec<SearchHit>> {
let mut stmt = self.storage.connection().prepare(
"
SELECT chunks.id, files.path, files.language, files.kind,
chunks.start_line, chunks.end_line, chunks.symbol_path, chunks.text
FROM chunks
JOIN files ON files.id = chunks.file_id
WHERE files.path = ?1
AND (
chunks.symbol_path = ?2
OR chunks.symbol_path LIKE ?3
OR chunks.text LIKE ?4
)
ORDER BY
CASE
WHEN chunks.symbol_path = ?2 THEN 0
WHEN chunks.symbol_path LIKE ?3 THEN 1
ELSE 2
END,
chunks.start_line
LIMIT ?5
",
)?;
let rows = stmt.query_map(
params![
symbol.path,
symbol.qualified_name,
format!("%{}%", symbol.name),
format!("%{}%", symbol.name),
i64::from(limit.max(1)),
],
|row| {
let text: String = row.get(7)?;
Ok(SearchHit {
chunk_id: row.get(0)?,
path: row.get(1)?,
language: row.get(2)?,
kind: row.get(3)?,
start_line: row.get(4)?,
end_line: row.get(5)?,
symbol_path: row.get(6)?,
score: 1.0,
retrieval_mode: "lexical".to_string(),
summary: bounded_summary(&text),
graph: None,
score_components: None,
})
},
)?;
let mut hits = Vec::new();
for row in rows {
hits.push(row?);
}
Ok(hits)
}
pub fn impact_surface(
&self,
query: &str,
limit: u32,
) -> anyhow::Result<Vec<crate::query::impact::ImpactItem>> {
crate::query::impact::impact_surface(self.storage.connection(), query, limit)
}
pub fn impact_surface_with_options(
&self,
query: &str,
limit: u32,
resolution_mode: crate::query::graph::GraphResolutionMode,
) -> anyhow::Result<Vec<crate::query::impact::ImpactItem>> {
crate::query::impact::impact_surface_with_options(
self.storage.connection(),
query,
limit,
resolution_mode,
)
}
pub fn impact_surface_for_selected_symbol(
&self,
symbol: &crate::query::symbol::SymbolHit,
limit: u32,
resolution_mode: crate::query::graph::GraphResolutionMode,
) -> anyhow::Result<Vec<crate::query::impact::ImpactItem>> {
crate::query::impact::impact_surface_for_symbol(
self.storage.connection(),
symbol,
limit,
resolution_mode,
)
}
pub fn impact_surface_report_for_selected_symbol(
&self,
symbol: &crate::query::symbol::SymbolHit,
limit: u32,
options: &crate::query::impact::ImpactSurfaceOptions,
) -> anyhow::Result<crate::query::impact::ImpactSurfaceReport> {
crate::query::impact::impact_surface_report_for_symbol(
self.storage.connection(),
symbol,
limit,
options,
)
}
pub fn repo_brief(
&self,
options: crate::query::repo_brief::RepoBriefOptions,
) -> anyhow::Result<crate::query::repo_brief::RepoBrief> {
crate::query::repo_brief::repo_brief(self.storage.connection(), options)
}
pub fn repo_clusters(
&self,
options: crate::query::clusters::RepoClustersOptions,
) -> anyhow::Result<crate::query::clusters::RepoClustersReport> {
crate::query::clusters::repo_clusters(self.storage.connection(), options)
}
pub fn memory_create(
&self,
request: crate::query::memory::RepoMemoryCreate,
) -> anyhow::Result<crate::query::memory::RepoMemoryCreateResult> {
crate::query::memory::create_memory(self.storage.connection(), request)
}
pub fn memory_update(
&self,
update: crate::query::memory::RepoMemoryUpdate,
) -> anyhow::Result<crate::query::memory::RepoMemory> {
crate::query::memory::update_memory(self.storage.connection(), update)
}
pub fn memory_mark_obsolete(
&self,
memory_id: &str,
) -> anyhow::Result<crate::query::memory::RepoMemory> {
crate::query::memory::mark_obsolete(self.storage.connection(), memory_id)
}
pub fn memory_search(
&self,
query: &str,
limit: u32,
) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
crate::query::memory::memory_search(self.storage.connection(), query, limit)
}
pub fn memory_for_symbol(
&self,
symbol: &crate::query::symbol::SymbolHit,
limit: u32,
) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
crate::query::memory::memories_for_symbol(self.storage.connection(), symbol, limit)
}
pub fn memory_for_path(
&self,
path: &str,
limit: u32,
) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
crate::query::memory::memories_for_path(self.storage.connection(), path, limit)
}
pub fn memory_for_edges(
&self,
edge_ids: &[i64],
limit: u32,
) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
crate::query::memory::memories_for_edges(self.storage.connection(), edge_ids, limit)
}
pub fn memory_evidence_for_symbol_and_edges(
&self,
symbol: &crate::query::symbol::SymbolHit,
caller_edge_ids: &[i64],
callee_edge_ids: &[i64],
limit: u32,
) -> anyhow::Result<crate::query::memory::RepoMemoryEvidence> {
crate::query::memory::memory_evidence_for_symbol_and_edges(
self.storage.connection(),
symbol,
caller_edge_ids,
callee_edge_ids,
limit,
)
}
pub fn memory_for_call_path_hash(
&self,
edge_sequence_hash: &str,
limit: u32,
) -> anyhow::Result<Vec<crate::query::memory::RepoMemory>> {
crate::query::memory::memories_for_call_path_hash(
self.storage.connection(),
edge_sequence_hash,
limit,
)
}
pub fn memory_rebind(
&self,
memory_id: &str,
bind: crate::query::memory::RepoMemoryBindTarget,
) -> anyhow::Result<crate::query::memory::RepoMemory> {
crate::query::memory::rebind_memory(self.storage.connection(), memory_id, bind)
}
pub fn memory_validate(
&self,
) -> anyhow::Result<crate::query::memory::RepoMemoryValidationReport> {
crate::query::memory::validate_memories(self.storage.connection())
}
pub fn memory_doctor(&self) -> anyhow::Result<Vec<crate::query::memory::MemoryDoctorEntry>> {
crate::query::memory::doctor_report(self.storage.connection())
}
pub fn memory_list(
&self,
kind: Option<&str>,
) -> anyhow::Result<Vec<crate::query::memory::MemorySummary>> {
crate::query::memory::list_memories(self.storage.connection(), kind)
}
pub fn memory_get(
&self,
memory_id: &str,
) -> anyhow::Result<Option<crate::query::memory::RepoMemory>> {
crate::query::memory::memory_by_id(self.storage.connection(), memory_id)
}
}