mod constants;
mod context_core;
mod context_service;
mod dto;
mod error;
mod exact_shard;
mod git;
mod hash;
mod indexing;
mod paths;
mod read_core;
mod registry;
mod search_core;
mod search_service;
mod snapshot;
mod store;
mod validation;
use std::collections::{BTreeMap, BTreeSet};
use std::fs;
use std::path::{Path, PathBuf};
use std::time::{SystemTime, UNIX_EPOCH};
use self::context_core::bounded_chunk_range;
use self::search_core::search_results_from_snapshot;
use crate::{Chunk, SourceIndex};
use validation::{
normalize_requested_path, validate_context_budget, validate_git_ref, validate_repo_name,
validate_top_k,
};
pub(crate) use constants::*;
pub use dto::*;
pub use error::AsrError;
pub use exact_shard::ExactShardSummary;
pub use paths::AsrPaths;
pub use store::{IndexStateRecord, RepoRecord};
use store::{INDEX_STATUS_FAILED, INDEX_STATUS_READY};
pub type AsrResult<T> = Result<T, AsrError>;
pub fn init() -> AsrResult<InitOutput> {
registry::init()
}
pub fn repo_add(name: &str, path: &Path) -> AsrResult<RepoAddOutput> {
registry::repo_add(name, path)
}
pub fn repo_list() -> AsrResult<RepoListOutput> {
registry::repo_list()
}
pub fn repo_status(name: &str) -> AsrResult<RepoStatusOutput> {
registry::repo_status(name)
}
pub fn repo_index(name: &str) -> AsrResult<RepoIndexOutput> {
indexing::repo_index(name)
}
pub fn repo_index_all() -> AsrResult<RepoIndexAllOutput> {
indexing::repo_index_all()
}
pub fn search(query: &str, repo_name: &str, top_k: usize) -> AsrResult<SearchOutput> {
search_service::search(query, repo_name, top_k)
}
pub fn context(query: &str, repo_name: &str, budget: usize) -> AsrResult<ContextOutput> {
context_service::context(query, repo_name, budget)
}
pub fn verify(repo_name: &str) -> AsrResult<VerifyOutput> {
verify_impl(repo_name)
}
fn verify_impl(repo_name: &str) -> AsrResult<VerifyOutput> {
validate_repo_name(repo_name)?;
let (paths, store) = open_store()?;
let snapshot = require_ready_index_snapshot(&paths, &store, repo_name)?;
let mut checks = Vec::new();
push_check(
&mut checks,
"registered_repo",
true,
"repository is registered and resolves to a local Git root",
);
push_check(
&mut checks,
"index_state_ready",
snapshot.state.status == INDEX_STATUS_READY,
"index_state is ready",
);
let snapshot_content_hash = content_hash_for_chunks(&snapshot.chunks);
push_check(
&mut checks,
"chunk_snapshot_hash",
snapshot.state.content_hash.as_deref() == Some(snapshot_content_hash.as_str()),
"SQLite chunk snapshot hash matches index_state.content_hash",
);
push_check(
&mut checks,
"persistent_exact_shard",
snapshot.exact_shard.summary.status == INDEX_STATUS_READY
&& snapshot.exact_shard.summary.content_hash
== snapshot.state.content_hash.clone().unwrap_or_default(),
"persistent exact shard is present, hash-validated, and ready",
);
push_check(
&mut checks,
"exact_shard_rebuildable_cache",
true,
"SQLite chunk snapshot is the source of truth; the persistent exact shard is a rebuildable cache validated against that snapshot",
);
let generated_leaks = generated_or_build_paths(&snapshot.chunks);
push_check(
&mut checks,
"generated_exclusion",
generated_leaks.is_empty(),
if generated_leaks.is_empty() {
"default generated/build/cache exclusions are absent from indexed chunks".to_string()
} else {
format!(
"generated/build/cache paths leaked into index: {}",
generated_leaks.join(", ")
)
},
);
let exact_symbol_check = verify_exact_symbol_top3(&snapshot)?;
push_check(
&mut checks,
"exact_symbol_top3",
exact_symbol_check.0,
exact_symbol_check.1,
);
let filename_check = verify_filename_top3(&snapshot)?;
push_check(
&mut checks,
"filename_top3",
filename_check.0,
filename_check.1,
);
let domain_check = verify_domain_query_top5(&snapshot)?;
push_check(
&mut checks,
"domain_query_top5",
domain_check.0,
domain_check.1,
);
push_check(
&mut checks,
"deterministic_search_order",
verify_deterministic_probe_search(&snapshot)?,
"probe search returns a stable path/start/end ordering",
);
push_check(
&mut checks,
"context_budget_policy",
verify_budget_policy(&snapshot),
"at least one indexed chunk can be packed under the default context budget",
);
let deps_impact_check = verify_deps_impact_hints(&snapshot, &paths);
push_check(
&mut checks,
"deps_impact_local_hints",
deps_impact_check.0,
deps_impact_check.1,
);
push_check(
&mut checks,
"read_range_policy",
MAX_READ_LINES > 0 && MAX_READ_FILE_BYTES > 0,
format!(
"read is bounded to {MAX_READ_LINES} lines and {MAX_READ_FILE_BYTES} bytes per file"
),
);
push_check(
&mut checks,
"diff_hunk_policy",
MAX_DIFF_HUNKS > 0,
format!("diff output is bounded to {MAX_DIFF_HUNKS} hunks and does not expose full files"),
);
let ok = checks.iter().all(|check| check.status == "passed");
let exact_shard = snapshot.exact_shard.summary.clone();
Ok(VerifyOutput {
repo: snapshot.repo.name,
ok,
index_state: search_index_state(snapshot.state, false, exact_shard),
checks,
})
}
pub fn read(repo_name: &str, path: &str, line_range: &str) -> AsrResult<ReadOutput> {
read_with_policy(repo_name, path, line_range, false)
}
pub fn read_live(repo_name: &str, path: &str, line_range: &str) -> AsrResult<ReadOutput> {
read_with_policy(repo_name, path, line_range, true)
}
pub(crate) fn read_with_policy(
repo_name: &str,
path: &str,
line_range: &str,
live: bool,
) -> AsrResult<ReadOutput> {
validate_repo_name(repo_name)?;
let (paths, store) = open_store()?;
let (repo, source_policy) = source_repo_for_file_command(&paths, &store, repo_name, live)?;
let git_root = checked_git_root(&repo)?;
let (relative_path, source) = read_registered_source(&repo, &git_root, path)?;
let requested_range = read_core::parse_line_range(line_range).map_err(read_line_range_error)?;
let total_lines = source.lines().count();
let read_plan = read_core::plan_read(requested_range, total_lines, MAX_READ_LINES)
.map_err(|err| read_plan_error(&relative_path, err))?;
let lines = read_core::collect_read_lines(&source, read_plan)
.into_iter()
.map(|line| ReadLine {
line: line.line,
content: line.content,
})
.collect();
Ok(ReadOutput {
repo: repo.name,
path: relative_path,
source_policy,
start_line: read_plan.start_line(),
end_line: read_plan.end_line(),
requested_start_line: read_plan.requested_start_line(),
requested_end_line: read_plan.requested_end_line(),
total_lines: read_plan.total_lines(),
source_hash: hash::hash_text(&source),
lines,
})
}
fn read_line_range_error(error: read_core::LineRangeParseError) -> AsrError {
match error {
read_core::LineRangeParseError::MissingSeparator => AsrError::new(
"invalid_line_range",
"Line range must use <start:end> format",
),
read_core::LineRangeParseError::InvalidStart => AsrError::new(
"invalid_line_range",
"Line range start must be a positive integer",
),
read_core::LineRangeParseError::InvalidEnd => AsrError::new(
"invalid_line_range",
"Line range end must be a positive integer",
),
read_core::LineRangeParseError::NonPositiveOrReversed => AsrError::new(
"invalid_line_range",
"Line range must be positive and start must be <= end",
),
}
}
fn read_plan_error(path: &str, error: read_core::ReadPlanError) -> AsrError {
match error {
read_core::ReadPlanError::RangeTooLarge { max_lines } => AsrError::with_path(
"invalid_line_range",
format!("Read range may not exceed {max_lines} lines"),
path.to_string(),
),
read_core::ReadPlanError::RangeOutsideFile {
requested_start_line,
requested_end_line,
total_lines,
} => AsrError::with_path(
"invalid_line_range",
format!(
"Line range {requested_start_line}:{requested_end_line} is outside file with {total_lines} lines"
),
path.to_string(),
),
}
}
pub fn diff(repo_name: &str, base: &str, head: &str) -> AsrResult<DiffOutput> {
diff_impl(repo_name, base, head)
}
fn diff_impl(repo_name: &str, base: &str, head: &str) -> AsrResult<DiffOutput> {
validate_repo_name(repo_name)?;
validate_git_ref(base, "base")?;
validate_git_ref(head, "head")?;
let (_paths, store) = open_store()?;
let repo = require_repo(&store, repo_name)?;
let git_root = checked_git_root(&repo)?;
let hunks = git::diff_hunks(&git_root, base, head)?;
if hunks.len() > MAX_DIFF_HUNKS {
return Err(AsrError::new(
"diff_too_large",
format!("Diff contains more than {MAX_DIFF_HUNKS} hunks"),
));
}
let mut changed_files = BTreeSet::new();
let mut outputs = Vec::new();
for hunk in hunks {
changed_files.insert(hunk.path.clone());
let old_end = hunk
.old_start
.saturating_add(hunk.old_lines.saturating_sub(1));
let new_end = hunk
.new_start
.saturating_add(hunk.new_lines.saturating_sub(1));
outputs.push(DiffHunkOutput {
path: hunk.path,
old_start: hunk.old_start,
old_lines: hunk.old_lines,
old_end,
new_start: hunk.new_start,
new_lines: hunk.new_lines,
new_end,
added_lines: hunk.added_lines,
removed_lines: hunk.removed_lines,
context_lines: hunk.context_lines,
summary: format!(
"{} added, {} removed, {} context",
hunk.added_lines, hunk.removed_lines, hunk.context_lines
),
section: hunk.section,
});
}
outputs.sort_by(|left, right| {
left.path
.cmp(&right.path)
.then_with(|| left.new_start.cmp(&right.new_start))
.then_with(|| left.old_start.cmp(&right.old_start))
});
Ok(DiffOutput {
repo: repo.name,
base: base.to_string(),
head: head.to_string(),
source_policy: git_ref_source_policy(),
hunk_count: outputs.len(),
changed_files: changed_files.into_iter().collect(),
hunks: outputs,
})
}
pub fn deps(repo_name: &str, path: &str) -> AsrResult<DepsOutput> {
deps_with_policy(repo_name, path, false)
}
pub fn deps_live(repo_name: &str, path: &str) -> AsrResult<DepsOutput> {
deps_with_policy(repo_name, path, true)
}
pub(crate) fn deps_with_policy(repo_name: &str, path: &str, live: bool) -> AsrResult<DepsOutput> {
validate_repo_name(repo_name)?;
let (paths, store) = open_store()?;
let (repo, source_policy) = source_repo_for_file_command(&paths, &store, repo_name, live)?;
let git_root = checked_git_root(&repo)?;
let (requested_path, _source) = read_registered_source(&repo, &git_root, path)?;
let graph = build_registered_graph(&paths, &repo)?;
let lookup_path = graph
.resolve_file_path(&requested_path)
.unwrap_or_else(|| requested_path.clone());
let node = graph.deps(&lookup_path).ok_or_else(|| {
AsrError::with_path(
"file_not_found",
"File is not present in ASR local import graph",
requested_path.clone(),
)
})?;
let mut symbols = node
.symbols
.iter()
.map(|symbol| GraphSymbolOutput {
name: symbol.name.clone(),
kind: symbol.kind.clone(),
line: symbol.line,
})
.collect::<Vec<_>>();
symbols.sort_by(|left, right| {
left.line
.cmp(&right.line)
.then_with(|| left.kind.cmp(&right.kind))
.then_with(|| left.name.cmp(&right.name))
});
let mut raw_imports = node.raw_imports.clone();
raw_imports.sort();
raw_imports.dedup();
let mut depends_on = node.depends_on.clone();
depends_on.sort();
depends_on.dedup();
let mut dependents = graph
.dependents(&lookup_path)
.into_iter()
.map(|path| path.to_string())
.collect::<Vec<_>>();
dependents.sort();
dependents.dedup();
Ok(DepsOutput {
repo: repo.name,
path: lookup_path,
source_policy,
analysis_level: "local_import_hints".to_string(),
symbols,
raw_imports,
depends_on,
dependents,
graph_files: graph.file_count(),
graph_edges: graph.edge_count(),
})
}
pub fn impact(repo_name: &str, path: &str) -> AsrResult<ImpactOutput> {
impact_with_policy(repo_name, path, false)
}
pub fn impact_live(repo_name: &str, path: &str) -> AsrResult<ImpactOutput> {
impact_with_policy(repo_name, path, true)
}
pub(crate) fn impact_with_policy(
repo_name: &str,
path: &str,
live: bool,
) -> AsrResult<ImpactOutput> {
validate_repo_name(repo_name)?;
let (paths, store) = open_store()?;
let (repo, source_policy) = source_repo_for_file_command(&paths, &store, repo_name, live)?;
let git_root = checked_git_root(&repo)?;
let (requested_path, _source) = read_registered_source(&repo, &git_root, path)?;
let graph = build_registered_graph(&paths, &repo)?;
let lookup_path = graph
.resolve_file_path(&requested_path)
.unwrap_or_else(|| requested_path.clone());
if graph.deps(&lookup_path).is_none() {
return Err(AsrError::with_path(
"file_not_found",
"File is not present in ASR local import graph",
requested_path,
));
}
let mut direct_dependents = graph
.dependents(&lookup_path)
.into_iter()
.map(|path| path.to_string())
.collect::<Vec<_>>();
direct_dependents.sort();
direct_dependents.dedup();
let mut affected = graph.impact(&lookup_path);
affected.sort();
affected.dedup();
Ok(ImpactOutput {
repo: repo.name,
path: lookup_path,
source_policy,
analysis_level: "local_import_hints".to_string(),
affected_count: affected.len(),
affected,
direct_dependents,
graph_files: graph.file_count(),
graph_edges: graph.edge_count(),
})
}
pub(crate) fn open_store() -> AsrResult<(AsrPaths, store::Store)> {
let paths = AsrPaths::resolve()?;
paths.ensure_dirs()?;
let store = store::Store::open(&paths.db_path)?;
store.init_schema()?;
Ok((paths, store))
}
pub(crate) fn require_repo(store: &store::Store, name: &str) -> AsrResult<RepoRecord> {
validate_repo_name(name)?;
store.get_repo(name)?.ok_or_else(|| {
AsrError::new(
"repo_not_found",
format!("Repository is not registered: {name}"),
)
})
}
pub(crate) fn source_type_for_kind(kind: git::RepoKind) -> &'static str {
match kind {
git::RepoKind::Worktree => SOURCE_TYPE_LOCAL,
git::RepoKind::Bare => SOURCE_TYPE_LOCAL_BARE,
}
}
pub(crate) fn repo_is_bare(repo: &RepoRecord) -> bool {
repo.source_type == SOURCE_TYPE_LOCAL_BARE
}
pub(crate) fn validate_asr_home_outside_repo(paths: &AsrPaths, git_root: &Path) -> AsrResult<()> {
let canonical_home = paths.home.canonicalize().map_err(|err| {
AsrError::with_path(
"asr_home_unreadable",
format!("ASR_HOME is unreadable: {err}"),
path_string(&paths.home),
)
})?;
let canonical_repo = git_root.canonicalize().map_err(|err| {
AsrError::with_path(
"repo_unreadable",
format!("Registered repository root is unreadable: {err}"),
path_string(git_root),
)
})?;
if canonical_home.starts_with(&canonical_repo) {
return Err(AsrError::with_path(
"asr_home_inside_repo",
"ASR_HOME must not be located inside a registered source repository",
path_string(&canonical_home),
));
}
if canonical_repo.starts_with(&canonical_home) {
return Err(AsrError::with_path(
"repo_inside_asr_home",
"Registered source repositories must not live inside ASR_HOME",
path_string(&canonical_repo),
));
}
Ok(())
}
pub(crate) fn checked_git_root(repo: &RepoRecord) -> AsrResult<PathBuf> {
let git_root = PathBuf::from(&repo.git_root);
if !git_root.exists() {
return Err(AsrError::with_path(
"repo_unreadable",
"Registered repository path does not exist",
repo.git_root.clone(),
));
}
let stored = git_root.canonicalize().map_err(|err| {
AsrError::with_path(
"repo_unreadable",
format!("Registered repository path is unreadable: {err}"),
repo.git_root.clone(),
)
})?;
let canonical = if repo_is_bare(repo) {
git::canonical_bare_root(&stored)?
} else {
git::canonical_git_root(&stored)?
};
if canonical != stored {
return Err(AsrError::with_path(
"repo_root_mismatch",
"Registered repository path no longer resolves to its stored Git root",
repo.git_root.clone(),
));
}
Ok(canonical)
}
pub(crate) struct ReadyIndexSnapshot {
pub(crate) repo: RepoRecord,
pub(crate) state: IndexStateRecord,
pub(crate) chunks: Vec<Chunk>,
pub(crate) exact_shard: exact_shard::ExactShardSnapshot,
}
fn require_ready_index_snapshot(
paths: &AsrPaths,
store: &store::Store,
repo_name: &str,
) -> AsrResult<ReadyIndexSnapshot> {
let repo = require_repo(store, repo_name)?;
let state = store.get_index_state(repo_name)?.ok_or_else(|| {
AsrError::new(
"repo_not_indexed",
format!("Repository must be indexed before search/context: {repo_name}"),
)
})?;
if state.status != INDEX_STATUS_READY {
return Err(AsrError::new(
"repo_index_not_ready",
format!("Repository index is not ready: {repo_name}"),
));
}
let git_root = checked_git_root(&repo)?;
let current_status = git::repository_status(&git_root, repo_is_bare(&repo))?;
let current_head = git::head_commit(&git_root);
let stale = state.head_commit != current_head
|| state.dirty != current_status.dirty
|| state.modified != current_status.modified
|| state.untracked != current_status.untracked
|| state.worktree_fingerprint != current_status.worktree_fingerprint;
if stale {
return Err(AsrError::new(
"repo_index_stale",
format!("Repository index is stale; run `asr repo index {repo_name} --json`"),
));
}
let chunks = store.list_chunks(repo_name)?;
if chunks.is_empty() {
return Err(AsrError::new(
"repo_index_empty",
format!("Repository index contains no chunks: {repo_name}"),
));
}
let content_hash = content_hash_for_chunks(&chunks);
if state.content_hash.as_deref() != Some(content_hash.as_str()) {
return Err(AsrError::new(
"repo_index_corrupt",
format!("Repository chunk snapshot hash mismatch: {repo_name}"),
));
}
let exact_shard = exact_shard::ensure_ready(&paths.exact_index, &repo, &state, &chunks)?;
let chunks = exact_shard.chunks.clone();
Ok(ReadyIndexSnapshot {
repo,
state,
chunks,
exact_shard,
})
}
pub(crate) fn index_registered_repo(
paths: &AsrPaths,
store: &store::Store,
repo: &RepoRecord,
) -> AsrResult<RepoIndexOutput> {
let git_root = checked_git_root(repo)?;
let branch = git::current_branch(&git_root);
let head_commit = git::head_commit(&git_root);
let worktree = git::repository_status(&git_root, repo_is_bare(repo))?;
let indexed_at = now_rfc3339();
if head_commit.is_none() {
let failed = failed_index_state(
repo,
head_commit.clone(),
&worktree,
indexed_at.clone(),
"Git repository has no HEAD commit",
);
if let Err(store_err) = store.put_index_state(&failed) {
log::warn!(
"Failed to record index failure for {}: {} ({})",
repo.name,
store_err.message,
store_err.code
);
}
return Err(AsrError::with_path(
"repo_head_missing",
"Git repository has no HEAD commit",
repo.git_root.clone(),
));
}
let index = match build_search_index(paths, repo) {
Ok(index) => index,
Err(err) => {
let failed = failed_index_state(
repo,
head_commit.clone(),
&worktree,
indexed_at,
err.message.clone(),
);
if let Err(store_err) = store.put_index_state(&failed) {
log::warn!(
"Failed to record index failure for {}: {} ({})",
repo.name,
store_err.message,
store_err.code
);
}
return Err(err);
}
};
let final_head_commit = git::head_commit(&git_root);
let final_worktree = git::repository_status(&git_root, repo_is_bare(repo))?;
if final_head_commit != head_commit
|| final_worktree.dirty != worktree.dirty
|| final_worktree.modified != worktree.modified
|| final_worktree.untracked != worktree.untracked
|| final_worktree.worktree_fingerprint != worktree.worktree_fingerprint
{
let failed = failed_index_state(
repo,
final_head_commit,
&final_worktree,
indexed_at.clone(),
"Repository changed while ASR was indexing it; retry after the worktree is stable",
);
if let Err(store_err) = store.put_index_state(&failed) {
log::warn!(
"Failed to record index failure for {}: {} ({})",
repo.name,
store_err.message,
store_err.code
);
}
return Err(AsrError::with_path(
"repo_changed_during_index",
"Repository changed while ASR was indexing it; retry after the worktree is stable",
repo.git_root.clone(),
));
}
let stats = index.stats();
let languages = stable_languages(stats.languages);
let chunks = index.chunks().to_vec();
let content_hash = content_hash_for_chunks(&chunks);
let state = IndexStateRecord {
repo_name: repo.name.clone(),
status: INDEX_STATUS_READY.to_string(),
head_commit: head_commit.clone(),
dirty: worktree.dirty,
untracked: worktree.untracked,
modified: worktree.modified,
worktree_fingerprint: worktree.worktree_fingerprint.clone(),
indexed_files: stats.indexed_files,
total_chunks: stats.total_chunks,
languages: languages.clone(),
content_hash: Some(content_hash.clone()),
error: None,
indexed_at: indexed_at.clone(),
};
if let Err(err) = store.replace_index(&repo.name, &state, &chunks) {
record_index_failure(
store,
repo,
head_commit.clone(),
&worktree,
indexed_at.clone(),
&err.message,
);
return Err(err);
}
if let Err(err) = store.update_repo_head(
&repo.name,
branch.as_deref(),
head_commit.as_deref(),
&indexed_at,
) {
record_index_failure(
store,
repo,
head_commit.clone(),
&worktree,
indexed_at.clone(),
&err.message,
);
return Err(err);
}
let exact_shard =
match exact_shard::write_ready(&paths.exact_index, repo, &state, &chunks) {
Ok(summary) => summary,
Err(err) => {
log::warn!(
"Exact shard write failed for {} (non-fatal, will rebuild on next search): {} ({})",
repo.name, err.message, err.code
);
exact_shard::error_summary(&paths.exact_index, &repo.name, &err.message)
}
};
Ok(RepoIndexOutput {
repo: repo.name.clone(),
git_root: repo.git_root.clone(),
head_commit,
dirty: worktree.dirty,
untracked: worktree.untracked,
modified: worktree.modified,
worktree_fingerprint: worktree.worktree_fingerprint,
status: INDEX_STATUS_READY.to_string(),
indexed_files: stats.indexed_files,
total_chunks: stats.total_chunks,
languages,
content_hash,
indexed_at,
exact_shard,
})
}
fn record_index_failure(
store: &store::Store,
repo: &RepoRecord,
head_commit: Option<String>,
worktree: &git::WorktreeStatus,
indexed_at: String,
error: &str,
) {
let failed = failed_index_state(repo, head_commit, worktree, indexed_at, error.to_string());
if let Err(store_err) = store.put_index_state(&failed) {
log::warn!(
"Failed to record index failure for {}: {} ({})",
repo.name,
store_err.message,
store_err.code
);
}
}
fn failed_index_state(
repo: &RepoRecord,
head_commit: Option<String>,
worktree: &git::WorktreeStatus,
indexed_at: String,
error: impl Into<String>,
) -> IndexStateRecord {
IndexStateRecord {
repo_name: repo.name.clone(),
status: INDEX_STATUS_FAILED.to_string(),
head_commit,
dirty: worktree.dirty,
untracked: worktree.untracked,
modified: worktree.modified,
worktree_fingerprint: worktree.worktree_fingerprint.clone(),
indexed_files: 0,
total_chunks: 0,
languages: BTreeMap::new(),
content_hash: None,
error: Some(error.into()),
indexed_at,
}
}
fn build_search_index(paths: &AsrPaths, repo: &RepoRecord) -> AsrResult<SourceIndex> {
if repo_is_bare(repo) {
let checkout = materialize_bare_head(paths, repo)?;
return build_search_index_at(checkout.path());
}
let git_root = checked_git_root(repo)?;
build_search_index_at(&git_root)
}
fn build_search_index_at(root: &Path) -> AsrResult<SourceIndex> {
SourceIndex::from_path_bm25_uncached(root, ASR_INCLUDE_TEXT_FILES).map_err(|err| {
let message = format_index_error(&err);
let code = if message.contains("No supported files found") {
"repo_no_supported_files"
} else {
"repo_index_failed"
};
AsrError::with_path(code, message, path_string(root))
})
}
fn build_registered_graph(
paths: &AsrPaths,
repo: &RepoRecord,
) -> AsrResult<crate::DependencyGraph> {
if repo_is_bare(repo) {
let checkout = materialize_bare_head(paths, repo)?;
return build_registered_graph_at(checkout.path());
}
let git_root = checked_git_root(repo)?;
build_registered_graph_at(&git_root)
}
fn build_registered_graph_at(root: &Path) -> AsrResult<crate::DependencyGraph> {
crate::build_dependency_graph(&path_string(root), ASR_INCLUDE_TEXT_FILES).map_err(|err| {
let message = format_index_error(&err);
let code = if message.contains("No supported files found") {
"repo_no_supported_files"
} else {
"graph_build_failed"
};
AsrError::with_path(code, message, path_string(root))
})
}
struct ScratchDir {
path: PathBuf,
}
impl ScratchDir {
fn path(&self) -> &Path {
&self.path
}
}
impl Drop for ScratchDir {
fn drop(&mut self) {
let _ = fs::remove_dir_all(&self.path);
}
}
fn materialize_bare_head(paths: &AsrPaths, repo: &RepoRecord) -> AsrResult<ScratchDir> {
let git_root = checked_git_root(repo)?;
let scratch = create_scratch_dir(paths, &repo.name)?;
git::checkout_head_tree(&git_root, scratch.path())?;
Ok(scratch)
}
fn create_scratch_dir(paths: &AsrPaths, repo_name: &str) -> AsrResult<ScratchDir> {
let unique = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|duration| duration.as_nanos())
.unwrap_or(0);
let path = paths.cache.join(format!(
"bare-checkout-{repo_name}-{}-{unique}",
std::process::id()
));
fs::create_dir_all(&path).map_err(|err| {
AsrError::with_path(
"bare_checkout_failed",
format!("Failed to create ASR bare checkout cache: {err}"),
path_string(&path),
)
})?;
Ok(ScratchDir { path })
}
fn format_index_error(err: &anyhow::Error) -> String {
let text = format!("{err:#}");
text.lines()
.next()
.unwrap_or("index build failed")
.to_string()
}
pub(crate) fn status_text(head_commit: Option<&str>, dirty: bool) -> &'static str {
if head_commit.is_none() {
"unborn"
} else if dirty {
"dirty"
} else {
"clean"
}
}
fn stable_languages(
languages: std::collections::HashMap<String, usize>,
) -> BTreeMap<String, usize> {
languages.into_iter().collect()
}
fn search_index_state(
state: IndexStateRecord,
stale: bool,
exact_shard: ExactShardSummary,
) -> SearchIndexState {
SearchIndexState {
status: state.status,
head_commit: state.head_commit,
dirty: state.dirty,
modified: state.modified,
untracked: state.untracked,
worktree_fingerprint: state.worktree_fingerprint,
content_hash: state.content_hash,
indexed_at: state.indexed_at,
stale,
exact_shard,
}
}
fn source_repo_for_file_command(
paths: &AsrPaths,
store: &store::Store,
repo_name: &str,
live: bool,
) -> AsrResult<(RepoRecord, SourcePolicy)> {
if live {
let repo = require_repo(store, repo_name)?;
let git_root = checked_git_root(&repo)?;
let worktree = git::repository_status(&git_root, repo_is_bare(&repo))?;
return Ok((
repo,
SourcePolicy {
mode: "live_registered_source".to_string(),
snapshot_bound: false,
live: true,
stale: false,
head_commit: git::head_commit(&git_root),
worktree_fingerprint: Some(worktree.worktree_fingerprint),
index_state: None,
note: "explicit --live read of the current registered source; result is not bound to the indexed chunk snapshot".to_string(),
},
));
}
let snapshot = require_ready_index_snapshot(paths, store, repo_name)?;
let policy = snapshot::source_policy_from_ready(&snapshot);
Ok((snapshot.repo, policy))
}
fn git_ref_source_policy() -> SourcePolicy {
SourcePolicy {
mode: "git_ref_diff".to_string(),
snapshot_bound: false,
live: false,
stale: false,
head_commit: None,
worktree_fingerprint: None,
index_state: None,
note: "diff output is computed from explicit Git refs and hunk metadata; it is not bound to the ASR indexed chunk snapshot".to_string(),
}
}
fn content_hash_for_chunks(chunks: &[Chunk]) -> String {
hash::content_hash_for_chunks(chunks)
}
fn push_check(checks: &mut Vec<VerifyCheck>, name: &str, passed: bool, message: impl Into<String>) {
checks.push(VerifyCheck {
name: name.to_string(),
status: if passed { "passed" } else { "failed" }.to_string(),
message: message.into(),
});
}
fn verify_exact_symbol_top3(snapshot: &ReadyIndexSnapshot) -> AsrResult<(bool, String)> {
let Some((query, path)) = exact_symbol_probe(&snapshot.chunks) else {
return Ok((
false,
"no quality symbol probe could be derived from indexed chunks".to_string(),
));
};
let passed = search_results_from_snapshot(&query, snapshot, 3)?
.iter()
.any(|result| result.chunk.file_path == path);
Ok((
passed,
format!("query {query:?} should return {path} in the top 3 results"),
))
}
fn verify_filename_top3(snapshot: &ReadyIndexSnapshot) -> AsrResult<(bool, String)> {
let Some((query, path)) = filename_probe(&snapshot.chunks) else {
return Ok((
false,
"no filename probe could be derived from indexed chunks".to_string(),
));
};
let passed = search_results_from_snapshot(&query, snapshot, 3)?
.iter()
.any(|result| result.chunk.file_path == path);
Ok((
passed,
format!("filename query {query:?} should return {path} in the top 3 results"),
))
}
fn verify_domain_query_top5(snapshot: &ReadyIndexSnapshot) -> AsrResult<(bool, String)> {
let Some((query, path)) = domain_probe(&snapshot.chunks) else {
return Ok((
false,
"no domain query probe could be derived from indexed chunks".to_string(),
));
};
let passed = search_results_from_snapshot(&query, snapshot, 5)?
.iter()
.any(|result| result.chunk.file_path == path);
Ok((
passed,
format!("domain query {query:?} should return {path} in the top 5 results"),
))
}
fn verify_deps_impact_hints(snapshot: &ReadyIndexSnapshot, paths: &AsrPaths) -> (bool, String) {
match build_registered_graph(paths, &snapshot.repo) {
Ok(graph) => (
graph.file_count() > 0,
format!(
"local import graph built with {} files and {} edges",
graph.file_count(),
graph.edge_count()
),
),
Err(err) => (
false,
format!("local import graph could not be built: {}", err.message),
),
}
}
fn exact_symbol_probe(chunks: &[Chunk]) -> Option<(String, String)> {
for chunk in chunks {
for token in quality_tokens_from_text(&chunk.content) {
if token.contains('_') || token.chars().any(|ch| ch.is_ascii_uppercase()) {
return Some((token, chunk.file_path.clone()));
}
}
}
chunks.iter().find_map(|chunk| {
quality_tokens_from_text(&chunk.content)
.into_iter()
.next()
.map(|token| (token, chunk.file_path.clone()))
})
}
fn filename_probe(chunks: &[Chunk]) -> Option<(String, String)> {
for chunk in chunks {
let stem = Path::new(&chunk.file_path)
.file_stem()?
.to_str()?
.to_string();
if is_quality_probe_token(&stem) {
return Some((stem, chunk.file_path.clone()));
}
}
None
}
fn domain_probe(chunks: &[Chunk]) -> Option<(String, String)> {
for chunk in chunks {
let mut tokens = quality_tokens_from_text(&chunk.content);
if let Some(stem) = Path::new(&chunk.file_path)
.file_stem()
.and_then(|stem| stem.to_str())
{
if is_quality_probe_token(stem) {
tokens.insert(0, stem.to_string());
}
}
tokens.sort_by_key(|left| left.to_ascii_lowercase());
tokens.dedup_by(|left, right| left.eq_ignore_ascii_case(right));
if tokens.len() >= 2 {
return Some((
tokens.into_iter().take(3).collect::<Vec<_>>().join(" "),
chunk.file_path.clone(),
));
}
}
None
}
fn quality_tokens_from_text(text: &str) -> Vec<String> {
let mut tokens = text
.split(|ch: char| !(ch.is_ascii_alphanumeric() || ch == '_'))
.filter(|token| is_quality_probe_token(token))
.map(ToOwned::to_owned)
.collect::<Vec<_>>();
tokens.sort();
tokens.dedup();
tokens
}
fn is_quality_probe_token(token: &str) -> bool {
let lower = token.to_ascii_lowercase();
token.len() >= 4
&& !matches!(
lower.as_str(),
"args"
| "bool"
| "call"
| "code"
| "const"
| "data"
| "else"
| "enum"
| "export"
| "false"
| "from"
| "func"
| "function"
| "impl"
| "import"
| "info"
| "init"
| "interface"
| "item"
| "list"
| "main"
| "match"
| "module"
| "name"
| "none"
| "null"
| "path"
| "private"
| "protocol"
| "public"
| "return"
| "self"
| "some"
| "static"
| "struct"
| "super"
| "test"
| "trait"
| "true"
| "type"
| "util"
| "where"
)
}
fn verify_deterministic_probe_search(snapshot: &ReadyIndexSnapshot) -> AsrResult<bool> {
let Some(query) = probe_query_for_chunks(&snapshot.chunks) else {
return Ok(false);
};
let first = search_results_from_snapshot(&query, snapshot, 8)?
.into_iter()
.map(|result| {
(
result.chunk.file_path,
result.chunk.start_line,
result.chunk.end_line,
)
})
.collect::<Vec<_>>();
let second = search_results_from_snapshot(&query, snapshot, 8)?
.into_iter()
.map(|result| {
(
result.chunk.file_path,
result.chunk.start_line,
result.chunk.end_line,
)
})
.collect::<Vec<_>>();
Ok(!first.is_empty() && first == second)
}
fn verify_budget_policy(snapshot: &ReadyIndexSnapshot) -> bool {
snapshot.chunks.iter().any(|chunk| {
bounded_chunk_range(chunk, &[], DEFAULT_CONTEXT_BUDGET)
.map(|(_, _, estimated)| estimated <= DEFAULT_CONTEXT_BUDGET)
.unwrap_or(false)
})
}
fn probe_query_for_chunks(chunks: &[Chunk]) -> Option<String> {
for chunk in chunks {
if let Some(token) = chunk
.content
.split(|ch: char| !(ch.is_ascii_alphanumeric() || ch == '_'))
.find(|token| token.len() >= 3)
{
return Some(token.to_string());
}
if let Some(stem) = Path::new(&chunk.file_path)
.file_stem()
.and_then(|stem| stem.to_str())
{
if stem.len() >= 3 {
return Some(stem.to_string());
}
}
}
None
}
fn generated_or_build_paths(chunks: &[Chunk]) -> Vec<String> {
let mut paths = BTreeSet::new();
for chunk in chunks {
if is_generated_or_build_path(&chunk.file_path) {
paths.insert(chunk.file_path.clone());
}
}
paths.into_iter().collect()
}
fn is_generated_or_build_path(path: &str) -> bool {
let lower = path.replace('\\', "/").to_ascii_lowercase();
lower.split('/').any(|part| {
matches!(
part,
".git"
| "node_modules"
| "target"
| ".build"
| "deriveddata"
| "dist"
| "build"
| ".cache"
| ".gradle"
| ".swiftpm"
)
}) || lower.contains(".generated.")
|| lower.contains("_generated.")
|| lower.contains(".pb.")
|| lower.ends_with(".pb.rs")
|| lower.ends_with(".min.js")
}
fn resolve_existing_repo_file(git_root: &Path, requested: &str) -> AsrResult<(String, PathBuf)> {
let relative_path = normalize_requested_path(requested)?;
let full_path = git_root.join(&relative_path);
if !full_path.exists() {
return Err(AsrError::with_path(
"file_not_found",
"File does not exist in registered repository",
relative_path,
));
}
let canonical_file = full_path.canonicalize().map_err(|err| {
AsrError::with_path(
"file_not_found",
format!("File is unreadable: {err}"),
relative_path.clone(),
)
})?;
let canonical_root = git_root.canonicalize().map_err(|err| {
AsrError::with_path(
"repo_unreadable",
format!("Registered repository root is unreadable: {err}"),
path_string(git_root),
)
})?;
if !canonical_file.starts_with(&canonical_root) {
return Err(AsrError::with_path(
"invalid_path",
"Requested file resolves outside the registered repository",
relative_path,
));
}
if !canonical_file.is_file() {
return Err(AsrError::with_path(
"file_not_found",
"Requested path is not a file",
relative_path,
));
}
Ok((relative_path, canonical_file))
}
fn read_registered_source(
repo: &RepoRecord,
git_root: &Path,
requested: &str,
) -> AsrResult<(String, String)> {
if repo_is_bare(repo) {
return read_bare_source(git_root, requested);
}
let (relative_path, full_path) = resolve_existing_repo_file(git_root, requested)?;
let metadata = fs::metadata(&full_path).map_err(|err| {
AsrError::with_path(
"file_not_found",
format!("Unable to read file metadata: {err}"),
relative_path.clone(),
)
})?;
if metadata.len() > MAX_READ_FILE_BYTES {
return Err(AsrError::with_path(
"file_too_large",
format!("File exceeds ASR read limit of {MAX_READ_FILE_BYTES} bytes"),
relative_path,
));
}
let source = fs::read_to_string(&full_path).map_err(|err| {
AsrError::with_path(
"file_not_text",
format!("File is not readable UTF-8 text: {err}"),
relative_path.clone(),
)
})?;
Ok((relative_path, source))
}
fn read_bare_source(git_root: &Path, requested: &str) -> AsrResult<(String, String)> {
let relative_path = normalize_requested_path(requested)?;
let bytes = git::show_head_file(git_root, &relative_path)?;
if bytes.len() as u64 > MAX_READ_FILE_BYTES {
return Err(AsrError::with_path(
"file_too_large",
format!("File exceeds ASR read limit of {MAX_READ_FILE_BYTES} bytes"),
relative_path,
));
}
let source = String::from_utf8(bytes).map_err(|err| {
AsrError::with_path(
"file_not_text",
format!("File is not readable UTF-8 text: {err}"),
relative_path.clone(),
)
})?;
Ok((relative_path, source))
}
fn shell_word(value: &str) -> String {
if !value.is_empty()
&& value
.chars()
.all(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '_' | '-' | '.' | '/' | ':'))
{
return value.to_string();
}
format!("'{}'", value.replace('\'', "'\\''"))
}
pub fn default_top_k() -> usize {
DEFAULT_TOP_K
}
pub fn default_context_budget() -> usize {
DEFAULT_CONTEXT_BUDGET
}
fn now_rfc3339() -> String {
chrono::Utc::now().to_rfc3339()
}
pub(crate) fn path_string(path: &Path) -> String {
path.to_string_lossy().replace('\\', "/")
}