#![allow(clippy::await_holding_lock)]
use std::path::Path;
use crate::embedder::Embedding;
use crate::parser::Chunk;
use crate::store::helpers::{embedding_to_bytes, StoreError};
use crate::store::Store;
use super::async_helpers::{batch_insert_chunks, snapshot_content_hashes, upsert_fts_conditional};
impl Store {
pub fn get_metadata(&self, key: &str) -> Result<String, StoreError> {
let _span = tracing::debug_span!("get_metadata", key = %key).entered();
self.rt.block_on(async {
let row: Option<(String,)> =
sqlx::query_as("SELECT value FROM metadata WHERE key = ?1")
.bind(key)
.fetch_optional(&self.pool)
.await?;
row.map(|(v,)| v)
.ok_or_else(|| StoreError::NotFound(format!("metadata key '{}'", key)))
})
}
pub fn upsert_chunks_batch(
&self,
chunks: &[(Chunk, Embedding)],
source_mtime: Option<i64>,
) -> Result<usize, StoreError> {
let _span = tracing::info_span!("upsert_chunks_batch", count = chunks.len()).entered();
let dim = self.dim;
let embedding_bytes: Vec<Vec<u8>> = chunks
.iter()
.map(|(_, emb)| embedding_to_bytes(emb, dim))
.collect::<Result<Vec<_>, _>>()?;
self.rt.block_on(async {
let (_guard, mut tx) = self.begin_write().await?;
let old_hashes = snapshot_content_hashes(&mut tx, chunks).await?;
let now = chrono::Utc::now().to_rfc3339();
batch_insert_chunks(&mut tx, chunks, &embedding_bytes, source_mtime, &now).await?;
upsert_fts_conditional(&mut tx, chunks, &old_hashes).await?;
tx.commit().await?;
Ok(chunks.len())
})
}
pub fn upsert_chunk(
&self,
chunk: &Chunk,
embedding: &Embedding,
source_mtime: Option<i64>,
) -> Result<(), StoreError> {
let _span = tracing::info_span!("upsert_chunk", name = %chunk.name).entered();
self.upsert_chunks_batch(&[(chunk.clone(), embedding.clone())], source_mtime)?;
Ok(())
}
pub fn update_embeddings_batch(
&self,
updates: &[(String, Embedding)],
) -> Result<usize, StoreError> {
if updates.is_empty() {
tracing::debug!("update_embeddings_batch called with empty batch, skipping");
return Ok(0);
}
let with_none: Vec<(String, Embedding, Option<String>)> = updates
.iter()
.map(|(id, emb)| (id.clone(), emb.clone(), None))
.collect();
self.update_embeddings_with_hashes_batch(&with_none)
}
pub fn update_embeddings_with_hashes_batch(
&self,
updates: &[(String, Embedding, Option<String>)],
) -> Result<usize, StoreError> {
let _span =
tracing::info_span!("update_embeddings_with_hashes_batch", count = updates.len())
.entered();
if updates.is_empty() {
return Ok(0);
}
let dim = self.dim;
let embedding_bytes: Vec<Vec<u8>> = updates
.iter()
.map(|(_, emb, _)| embedding_to_bytes(emb, dim))
.collect::<Result<Vec<_>, _>>()?;
self.rt.block_on(async {
let (_guard, mut tx) = self.begin_write().await?;
sqlx::query(
"CREATE TEMP TABLE IF NOT EXISTS _update_embeddings \
(id TEXT PRIMARY KEY, embedding BLOB NOT NULL, enrichment_hash TEXT)",
)
.execute(&mut *tx)
.await?;
sqlx::query("DELETE FROM _update_embeddings")
.execute(&mut *tx)
.await?;
const BATCH_SIZE: usize = 100;
for batch_start in (0..updates.len()).step_by(BATCH_SIZE) {
let batch_end = (batch_start + BATCH_SIZE).min(updates.len());
let batch = &updates[batch_start..batch_end];
let batch_bytes = &embedding_bytes[batch_start..batch_end];
let mut placeholders = Vec::with_capacity(batch.len());
for i in 0..batch.len() {
let base = i * 3;
placeholders.push(format!("(?{}, ?{}, ?{})", base + 1, base + 2, base + 3));
}
let sql = format!(
"INSERT INTO _update_embeddings (id, embedding, enrichment_hash) VALUES {}",
placeholders.join(", ")
);
let mut query = sqlx::query(&sql);
for (i, (id, _, hash)) in batch.iter().enumerate() {
query = query.bind(id);
query = query.bind(&batch_bytes[i]);
query = query.bind(hash.as_deref());
}
query.execute(&mut *tx).await?;
}
let result = sqlx::query(
"UPDATE chunks SET \
embedding = t.embedding, \
enrichment_hash = COALESCE(t.enrichment_hash, chunks.enrichment_hash) \
FROM _update_embeddings t \
WHERE chunks.id = t.id",
)
.execute(&mut *tx)
.await?;
let updated = result.rows_affected() as usize;
if updated < updates.len() {
let missing = updates.len() - updated;
tracing::debug!(missing, "Enrichment update: some chunk IDs not found");
}
sqlx::query("DROP TABLE IF EXISTS _update_embeddings")
.execute(&mut *tx)
.await?;
tx.commit().await?;
Ok(updated)
})
}
pub fn get_enrichment_hashes_batch(
&self,
chunk_ids: &[&str],
) -> Result<std::collections::HashMap<String, String>, StoreError> {
let _span =
tracing::debug_span!("get_enrichment_hashes_batch", count = chunk_ids.len()).entered();
if chunk_ids.is_empty() {
return Ok(std::collections::HashMap::new());
}
self.rt.block_on(async {
let mut result = std::collections::HashMap::new();
use crate::store::helpers::sql::max_rows_per_statement;
for batch in chunk_ids.chunks(max_rows_per_statement(1)) {
let placeholders = crate::store::helpers::make_placeholders(batch.len());
let sql = format!(
"SELECT id, enrichment_hash FROM chunks WHERE id IN ({}) AND enrichment_hash IS NOT NULL",
placeholders
);
let mut query = sqlx::query_as::<_, (String, String)>(&sql);
for id in batch {
query = query.bind(*id);
}
let rows = query.fetch_all(&self.pool).await?;
for (id, hash) in rows {
result.insert(id, hash);
}
}
Ok(result)
})
}
pub fn get_all_enrichment_hashes(
&self,
) -> Result<std::collections::HashMap<String, String>, StoreError> {
let _span = tracing::debug_span!("get_all_enrichment_hashes").entered();
self.rt.block_on(async {
let rows: Vec<(String, String)> = sqlx::query_as(
"SELECT id, enrichment_hash FROM chunks WHERE enrichment_hash IS NOT NULL",
)
.fetch_all(&self.pool)
.await?;
Ok(rows.into_iter().collect())
})
}
pub fn get_summaries_by_hashes(
&self,
content_hashes: &[&str],
purpose: &str,
) -> Result<std::collections::HashMap<String, String>, StoreError> {
let _span = tracing::debug_span!(
"get_summaries_by_hashes",
count = content_hashes.len(),
purpose
)
.entered();
if content_hashes.is_empty() {
return Ok(std::collections::HashMap::new());
}
self.rt.block_on(async {
let mut result = std::collections::HashMap::new();
use crate::store::helpers::sql::max_rows_per_statement;
for batch in content_hashes.chunks(max_rows_per_statement(1) - 1) {
let placeholders = crate::store::helpers::make_placeholders(batch.len());
let sql = format!(
"SELECT content_hash, summary FROM llm_summaries WHERE content_hash IN ({}) AND purpose = ?{}",
placeholders,
batch.len() + 1
);
let mut query = sqlx::query_as::<_, (String, String)>(&sql);
for hash in batch {
query = query.bind(*hash);
}
query = query.bind(purpose);
let rows = query.fetch_all(&self.pool).await?;
for (hash, summary) in rows {
result.insert(hash, summary);
}
}
Ok(result)
})
}
pub fn upsert_summaries_batch(
&self,
summaries: &[(String, String, String, String)],
) -> Result<usize, StoreError> {
let _span =
tracing::debug_span!("upsert_summaries_batch", count = summaries.len()).entered();
if summaries.is_empty() {
return Ok(0);
}
let now = chrono::Utc::now().to_rfc3339();
self.rt.block_on(async {
let (_guard, mut tx) = self.begin_write().await?;
use crate::store::helpers::sql::max_rows_per_statement;
const BATCH_SIZE: usize = max_rows_per_statement(5);
for batch in summaries.chunks(BATCH_SIZE) {
let mut qb: sqlx::QueryBuilder<sqlx::Sqlite> = sqlx::QueryBuilder::new(
"INSERT OR REPLACE INTO llm_summaries (content_hash, summary, model, purpose, created_at)",
);
qb.push_values(
batch.iter(),
|mut b, (hash, summary, model, purpose)| {
b.push_bind(hash)
.push_bind(summary)
.push_bind(model)
.push_bind(purpose)
.push_bind(&now);
},
);
qb.build().execute(&mut *tx).await?;
}
tx.commit().await?;
Ok(summaries.len())
})
}
pub fn get_all_summaries(
&self,
purpose: &str,
) -> Result<std::collections::HashMap<String, String>, StoreError> {
let _span = tracing::debug_span!("get_all_summaries", purpose).entered();
self.rt.block_on(async {
let rows: Vec<(String, String)> = sqlx::query_as(
"SELECT content_hash, summary FROM llm_summaries WHERE purpose = ?1",
)
.bind(purpose)
.fetch_all(&self.pool)
.await?;
Ok(rows.into_iter().collect())
})
}
pub fn get_all_content_hashes(&self) -> Result<Vec<String>, StoreError> {
let _span = tracing::debug_span!("get_all_content_hashes").entered();
self.rt.block_on(async {
let rows: Vec<(String,)> = sqlx::query_as("SELECT DISTINCT content_hash FROM chunks")
.fetch_all(&self.pool)
.await?;
Ok(rows.into_iter().map(|(h,)| h).collect())
})
}
pub fn prune_orphan_summaries(&self) -> Result<usize, StoreError> {
let _span = tracing::debug_span!("prune_orphan_summaries").entered();
self.rt.block_on(async {
let result = sqlx::query(
"DELETE FROM llm_summaries WHERE content_hash NOT IN \
(SELECT DISTINCT content_hash FROM chunks)",
)
.execute(&self.pool)
.await?;
Ok(result.rows_affected() as usize)
})
}
pub fn get_all_summaries_full(
&self,
) -> Result<Vec<(String, String, String, String)>, StoreError> {
let _span = tracing::debug_span!("get_all_summaries_full").entered();
self.rt.block_on(async {
let rows: Vec<(String, String, String, String)> =
sqlx::query_as("SELECT content_hash, summary, model, purpose FROM llm_summaries")
.fetch_all(&self.pool)
.await?;
Ok(rows)
})
}
pub fn needs_reindex(&self, path: &Path) -> Result<Option<i64>, StoreError> {
let _span = tracing::debug_span!("needs_reindex", path = %path.display()).entered();
let current_mtime = path
.metadata()?
.modified()?
.duration_since(std::time::UNIX_EPOCH)
.map_err(|_| StoreError::SystemTime)?
.as_millis() as i64;
self.rt.block_on(async {
let row: Option<(Option<i64>,)> =
sqlx::query_as("SELECT source_mtime FROM chunks WHERE origin = ?1 LIMIT 1")
.bind(crate::normalize_path(path))
.fetch_optional(&self.pool)
.await?;
match row {
Some((Some(stored_mtime),)) if stored_mtime >= current_mtime => Ok(None),
_ => Ok(Some(current_mtime)),
}
})
}
pub fn delete_by_origin(&self, origin: &Path) -> Result<u32, StoreError> {
let _span = tracing::info_span!("delete_by_origin", origin = %origin.display()).entered();
let origin_str = crate::normalize_path(origin);
self.rt.block_on(async {
let (_guard, mut tx) = self.begin_write().await?;
sqlx::query(
"DELETE FROM chunks_fts WHERE id IN (SELECT id FROM chunks WHERE origin = ?1)",
)
.bind(&origin_str)
.execute(&mut *tx)
.await?;
let result = sqlx::query("DELETE FROM chunks WHERE origin = ?1")
.bind(&origin_str)
.execute(&mut *tx)
.await?;
tx.commit().await?;
Ok(result.rows_affected() as u32)
})
}
pub fn upsert_chunks_and_calls(
&self,
chunks: &[(Chunk, Embedding)],
source_mtime: Option<i64>,
calls: &[(String, crate::parser::CallSite)],
) -> Result<usize, StoreError> {
let _span = tracing::info_span!(
"upsert_chunks_and_calls",
chunks = chunks.len(),
calls = calls.len()
)
.entered();
let dim = self.dim;
let embedding_bytes: Vec<Vec<u8>> = chunks
.iter()
.map(|(_, emb)| embedding_to_bytes(emb, dim))
.collect::<Result<Vec<_>, _>>()?;
self.rt.block_on(async {
let (_guard, mut tx) = self.begin_write().await?;
let old_hashes = snapshot_content_hashes(&mut tx, chunks).await?;
let now = chrono::Utc::now().to_rfc3339();
batch_insert_chunks(&mut tx, chunks, &embedding_bytes, source_mtime, &now).await?;
upsert_fts_conditional(&mut tx, chunks, &old_hashes).await?;
if !calls.is_empty() {
let unique_ids: Vec<&str> = {
let mut seen = std::collections::HashSet::new();
calls
.iter()
.filter_map(|(id, _)| {
if seen.insert(id.as_str()) {
Some(id.as_str())
} else {
None
}
})
.collect()
};
for batch in
unique_ids.chunks(crate::store::helpers::sql::max_rows_per_statement(1))
{
let placeholders: String = batch
.iter()
.enumerate()
.map(|(i, _)| format!("?{}", i + 1))
.collect::<Vec<_>>()
.join(",");
let sql = format!("DELETE FROM calls WHERE caller_id IN ({})", placeholders);
let mut query = sqlx::query(&sql);
for id in batch {
query = query.bind(*id);
}
query.execute(&mut *tx).await?;
}
const INSERT_BATCH: usize = 300;
for batch in calls.chunks(INSERT_BATCH) {
let mut query_builder: sqlx::QueryBuilder<sqlx::Sqlite> =
sqlx::QueryBuilder::new(
"INSERT INTO calls (caller_id, callee_name, line_number) ",
);
query_builder.push_values(batch.iter(), |mut b, (chunk_id, call)| {
b.push_bind(chunk_id)
.push_bind(&call.callee_name)
.push_bind(call.line_number as i64);
});
query_builder.build().execute(&mut *tx).await?;
}
}
tx.commit().await?;
Ok(chunks.len())
})
}
pub fn delete_phantom_chunks(
&self,
file: &std::path::Path,
live_ids: &[&str],
) -> Result<u32, StoreError> {
let _span =
tracing::info_span!("delete_phantom_chunks", ?file, live_count = live_ids.len())
.entered();
let origin_str = crate::normalize_path(file);
if live_ids.is_empty() {
return self.delete_by_origin(file);
}
self.rt.block_on(async {
let (_guard, mut tx) = self.begin_write().await?;
sqlx::query("CREATE TEMP TABLE IF NOT EXISTS _live_ids (id TEXT PRIMARY KEY)")
.execute(&mut *tx)
.await?;
sqlx::query("DELETE FROM _live_ids")
.execute(&mut *tx)
.await?;
for batch in live_ids.chunks(crate::store::helpers::sql::max_rows_per_statement(1)) {
let placeholders: Vec<String> =
batch.iter().enumerate().map(|(i, _)| format!("(?{})", i + 1)).collect();
let insert_sql = format!(
"INSERT OR IGNORE INTO _live_ids (id) VALUES {}",
placeholders.join(",")
);
let mut stmt = sqlx::query(&insert_sql);
for id in batch {
stmt = stmt.bind(id);
}
stmt.execute(&mut *tx).await?;
}
let fts_query =
"DELETE FROM chunks_fts WHERE id IN \
(SELECT id FROM chunks WHERE origin = ?1 AND id NOT IN (SELECT id FROM _live_ids))";
sqlx::query(fts_query)
.bind(&origin_str)
.execute(&mut *tx)
.await?;
let chunks_query =
"DELETE FROM chunks WHERE origin = ?1 AND id NOT IN (SELECT id FROM _live_ids)";
let result = sqlx::query(chunks_query)
.bind(&origin_str)
.execute(&mut *tx)
.await?;
tx.commit().await?;
let deleted = result.rows_affected() as u32;
if deleted > 0 {
tracing::info!(origin = %origin_str, deleted, "Removed phantom chunks");
}
Ok(deleted)
})
}
}
#[cfg(test)]
mod tests {
use super::super::test_utils::make_chunk;
use crate::test_helpers::{mock_embedding, setup_store};
#[test]
fn test_upsert_chunks_batch_insert_and_fetch() {
let (store, _dir) = setup_store();
let c1 = make_chunk("alpha", "src/a.rs");
let c2 = make_chunk("beta", "src/b.rs");
let emb = mock_embedding(1.0);
let count = store
.upsert_chunks_batch(
&[(c1.clone(), emb.clone()), (c2.clone(), emb.clone())],
Some(100),
)
.unwrap();
assert_eq!(count, 2);
let stats = store.stats().unwrap();
assert_eq!(stats.total_chunks, 2);
assert_eq!(stats.total_files, 2);
assert_eq!(store.chunk_count().unwrap(), 2);
}
#[test]
fn test_upsert_chunks_batch_updates_existing() {
let (store, _dir) = setup_store();
let c1 = make_chunk("alpha", "src/a.rs");
let emb1 = mock_embedding(1.0);
store
.upsert_chunks_batch(&[(c1.clone(), emb1)], Some(100))
.unwrap();
let emb2 = mock_embedding(2.0);
store
.upsert_chunks_batch(&[(c1.clone(), emb2.clone())], Some(200))
.unwrap();
assert_eq!(store.chunk_count().unwrap(), 1);
let found = store.get_embeddings_by_hashes(&[&c1.content_hash]).unwrap();
assert!(found.contains_key(&c1.content_hash));
}
#[test]
fn test_upsert_chunks_batch_empty() {
let (store, _dir) = setup_store();
let count = store.upsert_chunks_batch(&[], Some(100)).unwrap();
assert_eq!(count, 0);
assert_eq!(store.chunk_count().unwrap(), 0);
}
#[test]
fn test_get_summaries_empty_input() {
let (store, _dir) = setup_store();
let result = store.get_summaries_by_hashes(&[], "summary").unwrap();
assert!(result.is_empty());
}
#[test]
fn test_get_summaries_roundtrip() {
let (store, _dir) = setup_store();
let summaries = vec![
(
"hash_a".to_string(),
"summary A".to_string(),
"model-1".to_string(),
"summary".to_string(),
),
(
"hash_b".to_string(),
"summary B".to_string(),
"model-1".to_string(),
"summary".to_string(),
),
(
"hash_c".to_string(),
"summary C".to_string(),
"model-1".to_string(),
"summary".to_string(),
),
];
store.upsert_summaries_batch(&summaries).unwrap();
let result = store
.get_summaries_by_hashes(&["hash_a", "hash_b", "hash_c"], "summary")
.unwrap();
assert_eq!(result.len(), 3);
assert_eq!(result["hash_a"], "summary A");
assert_eq!(result["hash_b"], "summary B");
assert_eq!(result["hash_c"], "summary C");
}
#[test]
fn test_get_summaries_missing_keys() {
let (store, _dir) = setup_store();
let result = store
.get_summaries_by_hashes(&["nonexistent_1", "nonexistent_2"], "summary")
.unwrap();
assert!(result.is_empty());
}
#[test]
fn test_get_summaries_mixed() {
let (store, _dir) = setup_store();
let summaries = vec![
(
"h1".to_string(),
"s1".to_string(),
"m".to_string(),
"summary".to_string(),
),
(
"h2".to_string(),
"s2".to_string(),
"m".to_string(),
"summary".to_string(),
),
(
"h3".to_string(),
"s3".to_string(),
"m".to_string(),
"summary".to_string(),
),
];
store.upsert_summaries_batch(&summaries).unwrap();
let result = store
.get_summaries_by_hashes(&["h1", "h2", "h3", "h4", "h5"], "summary")
.unwrap();
assert_eq!(result.len(), 3);
assert!(result.contains_key("h1"));
assert!(result.contains_key("h2"));
assert!(result.contains_key("h3"));
assert!(!result.contains_key("h4"));
}
#[test]
fn test_upsert_summaries_empty() {
let (store, _dir) = setup_store();
let count = store.upsert_summaries_batch(&[]).unwrap();
assert_eq!(count, 0);
}
#[test]
fn test_upsert_summaries_overwrites() {
let (store, _dir) = setup_store();
store
.upsert_summaries_batch(&[(
"h1".to_string(),
"first".to_string(),
"m".to_string(),
"summary".to_string(),
)])
.unwrap();
store
.upsert_summaries_batch(&[(
"h1".to_string(),
"second".to_string(),
"m".to_string(),
"summary".to_string(),
)])
.unwrap();
let result = store.get_summaries_by_hashes(&["h1"], "summary").unwrap();
assert_eq!(result["h1"], "second");
}
#[test]
fn test_get_all_summaries_empty() {
let (store, _dir) = setup_store();
let result = store.get_all_summaries("summary").unwrap();
assert!(result.is_empty());
}
#[test]
fn test_get_all_summaries_all() {
let (store, _dir) = setup_store();
let summaries = vec![
(
"ha".to_string(),
"sa".to_string(),
"m".to_string(),
"summary".to_string(),
),
(
"hb".to_string(),
"sb".to_string(),
"m".to_string(),
"summary".to_string(),
),
(
"hc".to_string(),
"sc".to_string(),
"m".to_string(),
"summary".to_string(),
),
];
store.upsert_summaries_batch(&summaries).unwrap();
let all = store.get_all_summaries("summary").unwrap();
assert_eq!(all.len(), 3);
assert_eq!(all["ha"], "sa");
assert_eq!(all["hb"], "sb");
assert_eq!(all["hc"], "sc");
}
#[test]
fn test_prune_no_orphans() {
let (store, _dir) = setup_store();
let c1 = make_chunk("fn_a", "src/a.rs");
let c2 = make_chunk("fn_b", "src/b.rs");
let emb = mock_embedding(1.0);
store
.upsert_chunks_batch(&[(c1.clone(), emb.clone()), (c2.clone(), emb)], Some(100))
.unwrap();
let summaries = vec![
(
c1.content_hash,
"summary a".to_string(),
"m".to_string(),
"summary".to_string(),
),
(
c2.content_hash,
"summary b".to_string(),
"m".to_string(),
"summary".to_string(),
),
];
store.upsert_summaries_batch(&summaries).unwrap();
let pruned = store.prune_orphan_summaries().unwrap();
assert_eq!(pruned, 0);
let all = store.get_all_summaries("summary").unwrap();
assert_eq!(all.len(), 2);
}
#[test]
fn test_prune_removes_orphans() {
let (store, _dir) = setup_store();
let c1 = make_chunk("fn_a", "src/a.rs");
let emb = mock_embedding(1.0);
store
.upsert_chunks_batch(&[(c1.clone(), emb)], Some(100))
.unwrap();
let summaries = vec![
(
c1.content_hash.clone(),
"matching".to_string(),
"m".to_string(),
"summary".to_string(),
),
(
"orphan_hash_1".to_string(),
"orphan 1".to_string(),
"m".to_string(),
"summary".to_string(),
),
(
"orphan_hash_2".to_string(),
"orphan 2".to_string(),
"m".to_string(),
"summary".to_string(),
),
];
store.upsert_summaries_batch(&summaries).unwrap();
assert_eq!(store.get_all_summaries("summary").unwrap().len(), 3);
let pruned = store.prune_orphan_summaries().unwrap();
assert_eq!(pruned, 2);
let remaining = store.get_all_summaries("summary").unwrap();
assert_eq!(remaining.len(), 1);
assert!(remaining.contains_key(&c1.content_hash));
}
#[test]
fn test_summaries_different_purposes_coexist() {
let (store, _dir) = setup_store();
let summaries = vec![
(
"shared_hash".to_string(),
"This function parses config files.".to_string(),
"model-1".to_string(),
"summary".to_string(),
),
(
"shared_hash".to_string(),
"/// Parses configuration from TOML files.\n/// Returns a Config struct."
.to_string(),
"model-1".to_string(),
"doc-comment".to_string(),
),
];
store.upsert_summaries_batch(&summaries).unwrap();
let by_summary = store
.get_summaries_by_hashes(&["shared_hash"], "summary")
.unwrap();
assert_eq!(by_summary.len(), 1);
assert_eq!(
by_summary["shared_hash"],
"This function parses config files."
);
let by_doc = store
.get_summaries_by_hashes(&["shared_hash"], "doc-comment")
.unwrap();
assert_eq!(by_doc.len(), 1);
assert!(by_doc["shared_hash"].contains("Parses configuration"));
let all_summary = store.get_all_summaries("summary").unwrap();
assert_eq!(all_summary.len(), 1);
let all_doc = store.get_all_summaries("doc-comment").unwrap();
assert_eq!(all_doc.len(), 1);
}
#[test]
fn delete_phantom_chunks_removes_stale() {
let (store, _dir) = setup_store();
let emb = mock_embedding(1.0);
let c1 = make_chunk("a", "file.rs");
let c2 = make_chunk("b", "file.rs");
let c3 = make_chunk("c", "file.rs");
let id1 = c1.id.clone();
let id2 = c2.id.clone();
store
.upsert_chunks_batch(
&[(c1, emb.clone()), (c2, emb.clone()), (c3, emb.clone())],
Some(100),
)
.unwrap();
let live: Vec<&str> = vec![id1.as_str(), id2.as_str()];
let deleted = store
.delete_phantom_chunks(std::path::Path::new("file.rs"), &live)
.unwrap();
assert_eq!(deleted, 1, "Should delete one phantom chunk");
assert_eq!(store.chunk_count().unwrap(), 2);
}
#[test]
fn delete_phantom_chunks_empty_live_ids_deletes_all() {
let (store, _dir) = setup_store();
let emb = mock_embedding(1.0);
let c1 = make_chunk("a", "file.rs");
let c2 = make_chunk("b", "file.rs");
store
.upsert_chunks_batch(&[(c1, emb.clone()), (c2, emb.clone())], Some(100))
.unwrap();
let deleted = store
.delete_phantom_chunks(std::path::Path::new("file.rs"), &[])
.unwrap();
assert_eq!(
deleted, 2,
"Empty live_ids should delete all chunks for file"
);
}
#[test]
fn delete_phantom_chunks_no_phantoms() {
let (store, _dir) = setup_store();
let emb = mock_embedding(1.0);
let c1 = make_chunk("a", "file.rs");
let id1 = c1.id.clone();
store.upsert_chunks_batch(&[(c1, emb)], Some(100)).unwrap();
let deleted = store
.delete_phantom_chunks(std::path::Path::new("file.rs"), &[id1.as_str()])
.unwrap();
assert_eq!(deleted, 0, "No phantoms to delete");
}
#[test]
fn delete_phantom_chunks_wrong_file_unaffected() {
let (store, _dir) = setup_store();
let emb = mock_embedding(1.0);
let c1 = make_chunk("a", "file1.rs");
let c2 = make_chunk("b", "file2.rs");
store
.upsert_chunks_batch(&[(c1, emb.clone()), (c2, emb)], Some(100))
.unwrap();
let deleted = store
.delete_phantom_chunks(std::path::Path::new("file1.rs"), &[])
.unwrap();
assert_eq!(deleted, 1, "Should only delete file1.rs chunks");
assert_eq!(
store.chunk_count().unwrap(),
1,
"file2.rs chunk should remain"
);
}
}