use std::path::PathBuf;
use std::sync::Arc;
use dashmap::DashMap;
use crate::core::registry::IndexHandle;
pub(super) async fn load_into_cache(handle: &IndexHandle, map: &Arc<DashMap<PathBuf, String>>) {
let corpus = {
let indexer = handle.indexer.read().await;
indexer.corpus_store()
};
let Some(corpus) = corpus else {
return;
};
let result = tokio::task::spawn_blocking(move || corpus.load_file_hashes()).await;
match result {
Ok(Ok(entries)) => {
let count = entries.len();
for (path_str, hash) in entries {
let path = PathBuf::from(&path_str);
let needs_insert = map.get(&path).map(|v| v.value() != &hash).unwrap_or(true);
if needs_insert {
map.insert(path, hash);
}
}
if count > 0 {
tracing::info!(
"reindex: loaded {} persisted file hashes from redb (warm skip-cache)",
count
);
}
}
Ok(Err(e)) => {
tracing::warn!("reindex: could not load persisted file hashes ({e}) — cold start");
}
Err(e) => {
tracing::warn!("reindex: file-hash load task panicked ({e}) — cold start");
}
}
}
pub(super) async fn persist_batch(
handle: &IndexHandle,
new_hashes: &[(PathBuf, String)],
max_entries: usize,
current_map_len: usize,
) {
if new_hashes.is_empty() {
return;
}
if current_map_len > max_entries {
tracing::debug!(
"reindex: skipping hash persistence — cache over cap ({} > {})",
current_map_len,
max_entries
);
return;
}
let corpus = {
let indexer = handle.indexer.read().await;
indexer.corpus_store()
};
let Some(corpus) = corpus else {
return;
};
let pairs: Vec<(String, String)> = new_hashes
.iter()
.map(|(p, h)| {
let rel = p.to_string_lossy().into_owned();
(rel, h.clone())
})
.collect();
let result = tokio::task::spawn_blocking(move || {
let refs: Vec<(&str, &str)> = pairs
.iter()
.map(|(p, h)| (p.as_str(), h.as_str()))
.collect();
corpus.upsert_file_hashes(&refs)
})
.await;
match result {
Ok(Ok(())) => {}
Ok(Err(e)) => {
tracing::warn!("reindex: could not persist file hashes to redb ({e})");
}
Err(e) => {
tracing::warn!("reindex: file-hash persist task panicked ({e})");
}
}
}
pub(super) async fn clear_persisted(handle: &IndexHandle) {
let corpus = {
let indexer = handle.indexer.read().await;
indexer.corpus_store()
};
let Some(corpus) = corpus else {
return;
};
let result = tokio::task::spawn_blocking(move || corpus.clear_file_hashes()).await;
match result {
Ok(Ok(())) => {
tracing::debug!("reindex: cleared persisted file hashes from redb");
}
Ok(Err(e)) => {
tracing::warn!("reindex: could not clear persisted file hashes ({e})");
}
Err(e) => {
tracing::warn!("reindex: file-hash clear task panicked ({e})");
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::core::corpus::CorpusStore;
use crate::core::indexer::CodeIndexer;
use crate::core::registry::{IndexHandle, IndexId, IndexStages};
use std::sync::Arc;
use tokio::sync::RwLock;
fn make_handle_with_corpus(dir: &tempfile::TempDir) -> IndexHandle {
let root = dir.path().to_path_buf();
let db_path = root.join("index.redb");
let corpus = Arc::new(CorpusStore::open(&db_path).expect("open test corpus"));
let mut indexer = CodeIndexer::new("hash-cache-test", root.clone());
indexer.set_corpus_store(corpus);
IndexHandle {
id: IndexId::new("hash-cache-test"),
indexer: Arc::new(RwLock::new(indexer)),
root_path: root,
include_paths: vec![],
exclude_globs: vec![],
extensions: vec![],
domain_terms: vec![],
include_docs: false,
respect_gitignore: true,
path_filter: vec![],
context_embedding: Arc::new(RwLock::new(None)),
context_summary: Arc::new(RwLock::new(None)),
indexed_head_sha: Arc::new(RwLock::new(None)),
lexical_only: false,
skip_kg: false,
stages: Arc::new(RwLock::new(IndexStages::default())),
search_pressure: Arc::new(tokio::sync::Notify::new()),
walk_diagnostics: Arc::new(RwLock::new(
crate::core::registry::WalkDiagnostics::default(),
)),
}
}
#[tokio::test]
async fn load_into_cache_populates_map() {
let dir = tempfile::tempdir().unwrap();
let handle = make_handle_with_corpus(&dir);
{
let indexer = handle.indexer.read().await;
let corpus = indexer.corpus_store().unwrap();
corpus
.upsert_file_hashes(&[("src/a.rs", "aaa"), ("src/b.rs", "bbb")])
.unwrap();
}
let map: Arc<DashMap<PathBuf, String>> = Arc::new(DashMap::new());
load_into_cache(&handle, &map).await;
assert_eq!(map.len(), 2);
assert_eq!(
map.get(&PathBuf::from("src/a.rs"))
.map(|v| v.clone())
.unwrap(),
"aaa"
);
assert_eq!(
map.get(&PathBuf::from("src/b.rs"))
.map(|v| v.clone())
.unwrap(),
"bbb"
);
}
#[tokio::test]
async fn persist_batch_writes_to_store() {
let dir = tempfile::tempdir().unwrap();
let handle = make_handle_with_corpus(&dir);
let new_hashes = vec![
(PathBuf::from("src/a.rs"), "aaa".to_string()),
(PathBuf::from("src/b.rs"), "bbb".to_string()),
];
persist_batch(&handle, &new_hashes, 200_000, 2).await;
let corpus = handle.indexer.read().await.corpus_store().unwrap();
let mut loaded = corpus.load_file_hashes().unwrap();
loaded.sort_by(|x, y| x.0.cmp(&y.0));
assert_eq!(loaded.len(), 2);
assert_eq!(loaded[0], ("src/a.rs".to_string(), "aaa".to_string()));
}
#[tokio::test]
async fn clear_persisted_hashes_empties_store() {
let dir = tempfile::tempdir().unwrap();
let handle = make_handle_with_corpus(&dir);
{
let indexer = handle.indexer.read().await;
let corpus = indexer.corpus_store().unwrap();
corpus.upsert_file_hashes(&[("src/a.rs", "aaa")]).unwrap();
}
clear_persisted(&handle).await;
let corpus = handle.indexer.read().await.corpus_store().unwrap();
assert!(corpus.load_file_hashes().unwrap().is_empty());
}
#[tokio::test]
async fn persist_batch_skips_when_over_cap() {
let dir = tempfile::tempdir().unwrap();
let handle = make_handle_with_corpus(&dir);
let new_hashes = vec![(PathBuf::from("src/a.rs"), "aaa".to_string())];
persist_batch(&handle, &new_hashes, 5, 6).await;
let corpus = handle.indexer.read().await.corpus_store().unwrap();
assert!(
corpus.load_file_hashes().unwrap().is_empty(),
"over-cap persist must not write anything"
);
}
#[tokio::test]
async fn load_into_cache_no_corpus_is_noop() {
let indexer = CodeIndexer::new("no-corpus", "/tmp/no-corpus");
let handle = IndexHandle::bare(
IndexId::new("no-corpus"),
Arc::new(RwLock::new(indexer)),
PathBuf::from("/tmp/no-corpus"),
);
let map: Arc<DashMap<PathBuf, String>> = Arc::new(DashMap::new());
load_into_cache(&handle, &map).await;
assert!(map.is_empty());
}
}