use anyhow::Result;
use crate::core::chunker::RawChunk;
use crate::core::entity::EntityType;
use super::{build_compact_snippet, raw_to_code_chunk, CodeChunk, CodeIndexer};
impl CodeIndexer {
pub async fn find_chunk_id(&self, file_suffix: &str, function: Option<&str>) -> Option<String> {
let chunks = self.chunks.read().await;
let matching: Vec<&RawChunk> = chunks
.values()
.filter(|c| c.file.ends_with(file_suffix))
.filter(|c| match function {
Some(f) => c.function_name.as_deref() == Some(f),
None => true,
})
.collect();
matching
.into_iter()
.min_by_key(|c| c.start_line)
.map(|c| c.id.clone())
}
pub async fn all_chunks(&self) -> Vec<CodeChunk> {
let chunks = self.chunks.read().await;
chunks
.values()
.map(|raw| raw_to_code_chunk(raw, 0.0, "all", None))
.collect()
}
pub async fn enumerate_chunks(&self, offset: usize, limit: usize) -> (usize, Vec<CodeChunk>) {
let chunks = self.chunks.read().await;
let total = chunks.len();
if limit == 0 || offset >= total {
return (total, Vec::new());
}
let mut ordered: Vec<&RawChunk> = chunks.values().collect();
ordered.sort_by(|a, b| {
a.file
.cmp(&b.file)
.then(a.start_line.cmp(&b.start_line))
.then(a.end_line.cmp(&b.end_line))
});
let end = (offset + limit).min(total);
let page: Vec<CodeChunk> = ordered[offset..end]
.iter()
.map(|raw| raw_to_code_chunk(raw, 0.0, "enumerate", None))
.collect();
(total, page)
}
pub async fn similar_by_embedding(
&self,
embedding: &[f32],
top_k: usize,
exclude_id: Option<&str>,
) -> Result<Vec<CodeChunk>> {
let want = top_k.saturating_add(1).max(top_k);
let hits = self.vector_search(embedding, want).await?;
let chunks = self.chunks.read().await;
let mut out = Vec::with_capacity(top_k);
for (id, score) in hits {
if Some(id.as_str()) == exclude_id {
continue;
}
let Some(raw) = chunks.get(&id) else { continue };
let snippet = Some(build_compact_snippet(&raw.content));
out.push(raw_to_code_chunk(raw, score, "vector", snippet));
if out.len() >= top_k {
break;
}
}
Ok(out)
}
pub async fn entities_for(
&self,
file_path: &str,
) -> Option<Vec<crate::core::entity::RawEntity>> {
self.entities.read().await.get(file_path).cloned()
}
pub(super) async fn entity_exact_match(&self, query: &str) -> Option<String> {
let needle = query.trim();
if needle.is_empty() || needle.contains(' ') {
return None;
}
let entities = self.entities.read().await;
let chunks = self.chunks.read().await;
for (file, ents) in entities.iter() {
for ent in ents {
if !matches!(
ent.entity_type,
EntityType::NamedType | EntityType::ModulePath
) {
continue;
}
if ent.text.eq_ignore_ascii_case(needle) {
if let Some(c) = chunks
.values()
.filter(|c| c.file == *file)
.find(|c| ent.line >= c.start_line && ent.line <= c.end_line)
{
return Some(c.id.clone());
}
}
}
}
None
}
pub async fn remove_file(&self, file_path: &str) -> Result<usize> {
let ids: Vec<String> = {
let chunks = self.chunks.read().await;
chunks
.values()
.filter(|c| c.file == file_path)
.map(|c| c.id.clone())
.collect()
};
let removed = ids.len();
self.remove_chunks_from_stores(&ids).await;
self.entities.write().await.remove(file_path);
self.rebuild_symbol_graph().await;
Ok(removed)
}
async fn remove_chunks_from_stores(&self, ids: &[String]) {
if let Some(store) = &self.store {
for id in ids {
store.remove(id).await.ok();
}
}
{
let mut chunks = self.chunks.write().await;
for id in ids {
chunks.remove(id);
}
}
{
let mut emb = self.chunk_embeddings.write().await;
for id in ids {
emb.pop(id);
}
}
{
let mut bm25 = self.bm25.write().await;
for id in ids {
bm25.remove_document(id);
}
}
}
pub async fn remove_chunk(&self, chunk_id: &str) -> Result<()> {
if let Some(store) = &self.store {
store.remove(chunk_id).await.ok();
}
self.chunks.write().await.remove(chunk_id);
self.chunk_embeddings.write().await.pop(chunk_id);
self.bm25.write().await.remove_document(chunk_id);
self.rebuild_symbol_graph().await;
Ok(())
}
}