use std::path::PathBuf;
use std::sync::Arc;
use trusty_common::migrations::{
file_stamp::{read_version_from_file, write_version_to_file},
MigrationRunner, SchemaVersion,
};
use crate::core::{
corpus::CorpusStore,
embed::Embedder,
indexer::{migrations::JsonCorpusToRedbMigration, CodeIndexer},
store::{UsearchStore, VectorStore},
};
use crate::service::persistence::{self, PersistedIndex};
pub async fn build_indexer_with_persisted_state(
index_id: &str,
root_path: PathBuf,
embedder: &Arc<dyn Embedder>,
) -> CodeIndexer {
let entry = PersistedIndex {
id: index_id.to_string(),
root_path: root_path.clone(),
..Default::default()
};
build_indexer_from_entry(&entry, embedder).await
}
pub async fn build_indexer_from_entry(
entry: &PersistedIndex,
embedder: &Arc<dyn Embedder>,
) -> CodeIndexer {
let index_id = &entry.id;
let root_path = entry.root_path.clone();
let dim = embedder.dimension();
let store: Arc<dyn VectorStore> = build_store_for_entry(entry, dim).await;
let mut indexer =
CodeIndexer::new(index_id, root_path).with_components(Arc::clone(embedder), store);
match persistence::corpus_redb_path_for_entry(entry) {
Ok(redb_path) => match CorpusStore::open(&redb_path) {
Ok(corpus) => indexer.set_corpus_store(Arc::new(corpus)),
Err(e) => tracing::warn!(
"warm-boot: could not open redb corpus for '{index_id}' at {} ({e}) — \
running without durable corpus store",
redb_path.display()
),
},
Err(e) => tracing::warn!("cannot resolve redb corpus path for '{index_id}': {e}"),
}
restore_corpus_for_entry(&mut indexer, entry).await;
indexer
}
async fn restore_corpus_for_entry(indexer: &mut CodeIndexer, entry: &PersistedIndex) {
let index_id = &entry.id;
match indexer.load_chunks_from_redb().await {
Ok(n) if n > 0 => {
tracing::info!("warm-boot: restored {n} chunks for index '{index_id}' from redb");
stamp_if_unversioned_for_entry(entry);
return;
}
Ok(_) => {} Err(e) => tracing::warn!(
"warm-boot: redb corpus load failed for '{index_id}' ({e}) — \
trying registered migrations"
),
}
run_migrations_for_entry(indexer, entry);
}
fn run_migrations_for_entry(indexer: &mut CodeIndexer, entry: &PersistedIndex) {
let index_id = &entry.id;
let stamp_path = match persistence::schema_version_path_for_entry(entry) {
Ok(p) => p,
Err(e) => {
tracing::warn!("cannot resolve schema version path for '{index_id}': {e}");
return;
}
};
let current = match read_version_from_file(&stamp_path) {
Ok(v) => v,
Err(e) => {
tracing::warn!(
"warm-boot: failed to read schema stamp at {} ({e}) — \
treating as UNVERSIONED",
stamp_path.display()
);
SchemaVersion::UNVERSIONED
}
};
let runner = MigrationRunner::new(vec![Box::new(JsonCorpusToRedbMigration)]);
if let Err(e) = runner.run(indexer, current, |v| write_version_to_file(&stamp_path, v)) {
tracing::warn!(
"warm-boot: migration runner failed for '{index_id}' ({e}) — \
starting with whatever state was restored"
);
}
}
fn stamp_if_unversioned_for_entry(entry: &PersistedIndex) {
use crate::core::indexer::migrations::TRUSTY_SEARCH_SCHEMA_TARGET;
let index_id = &entry.id;
let stamp_path = match persistence::schema_version_path_for_entry(entry) {
Ok(p) => p,
Err(e) => {
tracing::warn!("cannot resolve schema version path for '{index_id}': {e}");
return;
}
};
let current = read_version_from_file(&stamp_path).unwrap_or(SchemaVersion::UNVERSIONED);
if current >= TRUSTY_SEARCH_SCHEMA_TARGET {
return;
}
if let Err(e) = write_version_to_file(&stamp_path, TRUSTY_SEARCH_SCHEMA_TARGET) {
tracing::warn!(
"warm-boot: failed to bump schema stamp for '{index_id}' at {} ({e})",
stamp_path.display()
);
}
}
async fn build_store_for_entry(entry: &PersistedIndex, dim: usize) -> Arc<dyn VectorStore> {
let index_id = &entry.id;
let path = match persistence::hnsw_path_for_entry(entry) {
Ok(p) => p,
Err(e) => {
tracing::warn!("cannot resolve hnsw path for '{index_id}': {e}");
return fresh_store(dim);
}
};
if persistence::has_persisted_hnsw(&path) {
match UsearchStore::load_from(&path).await {
Ok(Some(store)) => {
if store.dim() == dim {
tracing::info!(
"warm-boot: restored HNSW snapshot for '{}' from {}",
index_id,
path.display()
);
return Arc::new(store);
}
tracing::warn!(
"warm-boot: hnsw snapshot for '{}' has dim {} but embedder is {} — starting fresh",
index_id,
store.dim(),
dim
);
}
Ok(None) => {
tracing::warn!(
"warm-boot: hnsw snapshot at {} could not be loaded — starting fresh",
path.display()
);
}
Err(e) => {
tracing::warn!(
"warm-boot: error loading hnsw snapshot at {}: {e} — starting fresh",
path.display()
);
}
}
}
fresh_store(dim)
}
fn fresh_store(dim: usize) -> Arc<dyn VectorStore> {
let s = UsearchStore::new(dim).unwrap_or_else(|e| {
tracing::error!(
"failed to allocate UsearchStore (dim={dim}): {e} — daemon cannot continue"
);
panic!("usearch alloc failure (OOM during HNSW init, dim={dim}): {e}");
});
Arc::new(s) as Arc<dyn VectorStore>
}
#[cfg(test)]
mod tests {
use super::*;
use crate::core::chunker::{ChunkType, RawChunk};
use crate::service::colocated_storage;
use crate::service::persistence::PersistedIndex;
use tempfile::tempdir;
use trusty_common::embedder::MockEmbedder;
fn mock_embedder() -> Arc<dyn crate::core::embed::Embedder> {
Arc::new(MockEmbedder::new(8))
}
fn minimal_raw_chunk(id: &str) -> RawChunk {
RawChunk {
id: id.to_string(),
file: "src/lib.rs".to_string(),
start_line: 1,
end_line: 5,
content: "fn hello() {}".to_string(),
function_name: None,
language: Some("rust".to_string()),
chunk_type: ChunkType::Code,
calls: Vec::new(),
inherits_from: Vec::new(),
chunk_depth: 0,
parent_chunk_id: None,
child_chunk_ids: Vec::new(),
nlp_keywords: Vec::new(),
nlp_code_refs: Vec::new(),
virtual_terms: Vec::new(),
}
}
#[tokio::test]
async fn colocated_create_handler_path_survives_simulated_reload() {
let tmp = tempdir().unwrap();
let root = tmp.path().to_path_buf();
let embedder = mock_embedder();
let entry = PersistedIndex {
id: "test-idx-483".to_string(),
root_path: root.clone(),
colocated: true,
..Default::default()
};
let indexer = build_indexer_from_entry(&entry, &embedder).await;
assert!(
indexer.has_corpus_store(),
"#483: indexer must have a corpus store after build_indexer_from_entry with colocated=true"
);
assert!(
colocated_storage::has_colocated_storage(&root),
"#483: .trusty-search/ must exist after build_indexer_from_entry with colocated=true"
);
{
let corpus = indexer.corpus_store().expect("corpus store must be set");
corpus
.upsert_chunks(&[minimal_raw_chunk("src/lib.rs:1:5")])
.expect("upsert must succeed");
}
drop(indexer);
let reloaded = build_indexer_from_entry(&entry, &embedder).await;
assert!(
reloaded.has_corpus_store(),
"#483: reloaded indexer must have a corpus store"
);
let chunk_count = reloaded
.corpus_store()
.expect("corpus store must be set")
.chunk_count()
.expect("chunk_count must succeed");
assert!(
chunk_count > 0,
"#483: reloaded index must contain the written chunk (got {chunk_count}); \
writer/loader paths must agree on the colocated location"
);
}
#[tokio::test]
async fn colocated_create_path_wires_corpus_store_for_schema_version() {
let tmp = tempdir().unwrap();
let root = tmp.path().to_path_buf();
let embedder = mock_embedder();
let entry = PersistedIndex {
id: "test-idx-485".to_string(),
root_path: root.clone(),
colocated: true,
..Default::default()
};
let indexer = build_indexer_from_entry(&entry, &embedder).await;
assert!(
indexer.has_corpus_store(),
"#485: indexer built via colocated create path must have corpus store; \
without it write_schema_version returns 'no durable corpus'"
);
if let Some(corpus) = indexer.corpus_store() {
let corpus_path = corpus.path().to_path_buf();
assert!(
corpus_path.starts_with(&root),
"#485: corpus store must be inside the project root (colocated); \
got {corpus_path:?}"
);
}
}
#[tokio::test]
async fn legacy_non_colocated_path_does_not_panic() {
let tmp = tempdir().unwrap();
let root = tmp.path().to_path_buf();
let embedder = mock_embedder();
let entry = PersistedIndex {
id: "test-idx-legacy".to_string(),
root_path: root.clone(),
colocated: false,
..Default::default()
};
let _indexer = build_indexer_from_entry(&entry, &embedder).await;
}
}