use super::*;
use crate::core::embed::MockEmbedder;
use crate::core::store::UsearchStore;
use std::sync::atomic::Ordering;
fn raw(id: &str, file: &str, content: &str) -> RawChunk {
RawChunk {
id: id.to_string(),
file: file.to_string(),
start_line: 1,
end_line: 1 + content.lines().count(),
content: content.to_string(),
function_name: None,
language: Some("rust".to_string()),
chunk_type: crate::core::chunker::ChunkType::Code,
calls: Vec::new(),
inherits_from: Vec::new(),
chunk_depth: 0,
parent_chunk_id: None,
child_chunk_ids: Vec::new(),
nlp_keywords: Vec::new(),
nlp_code_refs: Vec::new(),
virtual_terms: Vec::new(),
}
}
fn raw_with_kind(
id: &str,
file: &str,
content: &str,
chunk_type: crate::core::chunker::ChunkType,
function_name: Option<&str>,
) -> RawChunk {
let mut c = raw(id, file, content);
c.chunk_type = chunk_type;
c.function_name = function_name.map(|s| s.to_string());
c
}
fn make_indexer() -> CodeIndexer {
let dim = 32;
let embedder: Arc<dyn Embedder> = Arc::new(MockEmbedder::new(dim));
let store: Arc<dyn VectorStore> = Arc::new(UsearchStore::new(dim).expect("usearch new"));
CodeIndexer::new("test", "/tmp/test").with_components(embedder, store)
}
#[tokio::test]
async fn test_save_chunks_roundtrip() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("chunks.json");
let idx = make_indexer();
idx.add_chunk(raw("a", "src/a.rs", "fn authenticate() {}"))
.await
.unwrap();
idx.add_chunk(raw("b", "src/b.rs", "fn verify_token() {}"))
.await
.unwrap();
idx.save_chunks_to_disk(&path).await.expect("save chunks");
assert!(path.exists());
let restored = make_indexer();
let n = restored
.load_chunks_from_disk(&path)
.await
.expect("load chunks");
assert_eq!(n, 2);
assert_eq!(restored.chunk_count(), 2);
let bm25 = restored.bm25.read().await;
let hits = bm25.score_query_all("authenticate", 5);
drop(bm25);
assert!(
hits.iter().any(|(id, _)| id == "a"),
"BM25 not rebuilt from restored chunks: {:?}",
hits
);
}
#[tokio::test]
async fn test_load_chunks_missing_file_returns_zero() {
let idx = make_indexer();
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("nope.json");
let n = idx.load_chunks_from_disk(&path).await.unwrap();
assert_eq!(n, 0);
}
#[tokio::test]
async fn test_persist_coalesces_concurrent_calls() {
let idx = make_indexer();
idx.add_chunk(raw("a", "a.rs", "fn a() {}")).await.unwrap();
for _ in 0..64 {
idx.spawn_incremental_persist(true);
}
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(15);
loop {
let in_flight = idx.persist_state.in_flight.load(Ordering::Acquire);
let dirty = idx.persist_state.dirty.load(Ordering::Acquire);
if !in_flight && !dirty {
break;
}
if std::time::Instant::now() >= deadline {
panic!(
"persist coalescing loop did not drain within 15s: \
in_flight={in_flight}, dirty={dirty}"
);
}
tokio::time::sleep(std::time::Duration::from_millis(25)).await;
}
idx.persist_state.dirty.store(false, Ordering::Release);
idx.spawn_incremental_persist(true);
let _ = idx.persist_state.in_flight.load(Ordering::Acquire);
}
#[tokio::test]
async fn test_incremental_persist_throttles_to_interval() {
let idx = make_indexer();
assert_eq!(idx.persist_state.batch_counter.load(Ordering::Acquire), 0);
for _ in 0..HNSW_SNAPSHOT_BATCH_INTERVAL {
idx.spawn_incremental_persist(false);
}
assert_eq!(
idx.persist_state.batch_counter.load(Ordering::Acquire),
HNSW_SNAPSHOT_BATCH_INTERVAL,
"every non-forced call must increment the batch counter"
);
idx.spawn_incremental_persist(false);
assert_eq!(
idx.persist_state.batch_counter.load(Ordering::Acquire),
HNSW_SNAPSHOT_BATCH_INTERVAL + 1
);
let before = idx.persist_state.batch_counter.load(Ordering::Acquire);
idx.force_incremental_persist();
assert_eq!(
idx.persist_state.batch_counter.load(Ordering::Acquire),
before,
"force_incremental_persist must not increment the batch counter"
);
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(15);
while idx.persist_state.in_flight.load(Ordering::Acquire)
|| idx.persist_state.dirty.load(Ordering::Acquire)
{
if std::time::Instant::now() >= deadline {
panic!("persist tasks did not drain within 15s");
}
tokio::time::sleep(std::time::Duration::from_millis(25)).await;
}
}
#[tokio::test]
async fn test_search_integration_returns_relevant_chunk_first() {
let idx = make_indexer();
idx.add_chunk(raw(
"src/auth.rs:1:5",
"src/auth.rs",
"fn authenticate(user: &str, password: &str) -> bool { true }",
))
.await
.unwrap();
idx.add_chunk(raw(
"src/render.rs:1:3",
"src/render.rs",
"fn render_ui_components() { /* svelte */ }",
))
.await
.unwrap();
idx.add_chunk(raw(
"src/db.rs:1:4",
"src/db.rs",
"struct Database { conn: String }",
))
.await
.unwrap();
let q = SearchQuery {
text: "fn authenticate".to_string(),
top_k: 3,
expand_graph: false,
compact: true,
..Default::default()
};
let results = idx.search(&q).await.expect("search");
assert!(!results.is_empty(), "search should return at least one hit");
assert_eq!(
results[0].id,
"src/auth.rs:1:5",
"auth chunk must rank first; got {:?}",
results.iter().map(|r| &r.id).collect::<Vec<_>>()
);
assert!(
results[0].compact_snippet.is_some(),
"compact_snippet should be populated when compact=true"
);
assert!(
results[0].match_reason == "hybrid" || results[0].match_reason == "bm25",
"expected hybrid or bm25 match_reason, got {}",
results[0].match_reason
);
}
#[tokio::test]
async fn test_query_cache_skips_embedder_on_repeat() {
let idx = make_indexer();
let q = "find user authentication logic";
let v1 = idx.embed_query(q).await.unwrap().unwrap();
let key = hash_query(q);
let cached = {
let mut g = idx.query_cache.lock().unwrap();
g.get(&key).cloned()
};
assert_eq!(cached.as_ref(), Some(&v1), "cache must be populated");
let v2 = idx.embed_query(q).await.unwrap().unwrap();
assert_eq!(v1, v2, "second call must return identical vector via cache");
}
#[tokio::test]
async fn test_search_with_no_embedder_falls_back_to_bm25() {
let idx = CodeIndexer::new("bm25-only", "/tmp/test");
idx.add_chunk(raw("f.rs:1:1", "f.rs", "fn authenticate() {}"))
.await
.unwrap();
idx.add_chunk(raw("g.rs:1:1", "g.rs", "fn unrelated() {}"))
.await
.unwrap();
let q = SearchQuery {
text: "authenticate".to_string(),
top_k: 5,
expand_graph: false,
compact: false,
..Default::default()
};
let r = idx.search(&q).await.unwrap();
assert_eq!(r[0].id, "f.rs:1:1");
assert_eq!(r[0].match_reason, "bm25");
}
#[tokio::test]
async fn test_remove_chunk_removes_from_results() {
let idx = make_indexer();
idx.add_chunk(raw("a:1:1", "a.rs", "fn authenticate() {}"))
.await
.unwrap();
idx.add_chunk(raw("b:1:1", "b.rs", "fn other_thing() {}"))
.await
.unwrap();
idx.remove_chunk("a:1:1").await.unwrap();
let q = SearchQuery {
text: "authenticate".to_string(),
top_k: 5,
expand_graph: false,
compact: false,
..Default::default()
};
let r = idx.search(&q).await.unwrap();
assert!(!r.iter().any(|c| c.id == "a:1:1"));
}
#[tokio::test]
async fn test_kg_expansion_marks_neighbours_with_hybrid_kg() {
let idx = CodeIndexer::new("kg-test", "/tmp/test");
idx.add_chunk(RawChunk {
id: "h:1".to_string(),
file: "h.rs".to_string(),
start_line: 1,
end_line: 3,
content: "fn login_handler() { /* dispatch to verifier */ }".to_string(),
function_name: Some("login_handler".to_string()),
language: Some("rust".to_string()),
chunk_type: crate::core::chunker::ChunkType::Function,
calls: vec!["authenticate".to_string()],
inherits_from: Vec::new(),
chunk_depth: 0,
parent_chunk_id: None,
child_chunk_ids: Vec::new(),
nlp_keywords: Vec::new(),
nlp_code_refs: Vec::new(),
virtual_terms: Vec::new(),
})
.await
.unwrap();
idx.add_chunk(RawChunk {
id: "a:1".to_string(),
file: "a.rs".to_string(),
start_line: 1,
end_line: 1,
content: "fn authenticate() {}".to_string(),
function_name: Some("authenticate".to_string()),
language: Some("rust".to_string()),
chunk_type: crate::core::chunker::ChunkType::Function,
calls: Vec::new(),
inherits_from: Vec::new(),
chunk_depth: 0,
parent_chunk_id: None,
child_chunk_ids: Vec::new(),
nlp_keywords: Vec::new(),
nlp_code_refs: Vec::new(),
virtual_terms: Vec::new(),
})
.await
.unwrap();
let q = SearchQuery {
text: "callers of authenticate".to_string(),
top_k: 10,
expand_graph: true,
compact: false,
..Default::default()
};
let results = idx.search(&q).await.unwrap();
let login = results
.iter()
.find(|c| c.id == "h:1")
.expect("login_handler should surface via KG expansion");
assert_eq!(
login.match_reason, "hybrid+kg",
"KG-expanded chunks must carry hybrid+kg marker, got {}",
login.match_reason
);
let trigger = results
.iter()
.find(|c| c.id == "a:1")
.expect("authenticate must appear directly");
let expected = trigger.score * KG_EXPAND_SCORE_FACTOR;
assert!(
(login.score - expected).abs() < 1e-5,
"expected KG score = 0.7 * {} = {}, got {}",
trigger.score,
expected,
login.score
);
}
#[tokio::test]
async fn test_kg_expansion_disabled_by_expand_graph_false() {
let idx = make_indexer();
idx.add_chunk(RawChunk {
id: "h:1".to_string(),
file: "h.rs".to_string(),
start_line: 1,
end_line: 1,
content: "fn caller() { target(); }".to_string(),
function_name: Some("caller".to_string()),
language: Some("rust".to_string()),
chunk_type: crate::core::chunker::ChunkType::Function,
calls: vec!["target".to_string()],
inherits_from: Vec::new(),
chunk_depth: 0,
parent_chunk_id: None,
child_chunk_ids: Vec::new(),
nlp_keywords: Vec::new(),
nlp_code_refs: Vec::new(),
virtual_terms: Vec::new(),
})
.await
.unwrap();
idx.add_chunk(RawChunk {
id: "t:1".to_string(),
file: "t.rs".to_string(),
start_line: 1,
end_line: 1,
content: "fn target() {}".to_string(),
function_name: Some("target".to_string()),
language: Some("rust".to_string()),
chunk_type: crate::core::chunker::ChunkType::Function,
calls: Vec::new(),
inherits_from: Vec::new(),
chunk_depth: 0,
parent_chunk_id: None,
child_chunk_ids: Vec::new(),
nlp_keywords: Vec::new(),
nlp_code_refs: Vec::new(),
virtual_terms: Vec::new(),
})
.await
.unwrap();
let q = SearchQuery {
text: "callers of target".to_string(),
top_k: 10,
expand_graph: false,
compact: false,
..Default::default()
};
let results = idx.search(&q).await.unwrap();
assert!(
!results.iter().any(|c| c.match_reason.contains("kg")),
"expand_graph=false must suppress KG expansion, got {results:#?}"
);
}
#[tokio::test]
async fn search_semantic_stage_skips_kg_expansion() {
let idx = make_indexer();
idx.add_chunk(RawChunk {
id: "h:1".to_string(),
file: "h.rs".to_string(),
start_line: 1,
end_line: 1,
content: "fn caller() { /* dispatch */ }".to_string(),
function_name: Some("caller".to_string()),
language: Some("rust".to_string()),
chunk_type: crate::core::chunker::ChunkType::Function,
calls: vec!["target".to_string()],
inherits_from: Vec::new(),
chunk_depth: 0,
parent_chunk_id: None,
child_chunk_ids: Vec::new(),
nlp_keywords: Vec::new(),
nlp_code_refs: Vec::new(),
virtual_terms: Vec::new(),
})
.await
.unwrap();
idx.add_chunk(RawChunk {
id: "t:1".to_string(),
file: "t.rs".to_string(),
start_line: 1,
end_line: 1,
content: "fn target() {}".to_string(),
function_name: Some("target".to_string()),
language: Some("rust".to_string()),
chunk_type: crate::core::chunker::ChunkType::Function,
calls: Vec::new(),
inherits_from: Vec::new(),
chunk_depth: 0,
parent_chunk_id: None,
child_chunk_ids: Vec::new(),
nlp_keywords: Vec::new(),
nlp_code_refs: Vec::new(),
virtual_terms: Vec::new(),
})
.await
.unwrap();
let q = SearchQuery {
text: "callers of target".to_string(),
top_k: 10,
expand_graph: true,
compact: false,
stage: Some(super::SearchStage::Semantic),
..Default::default()
};
let results = idx.search(&q).await.unwrap();
assert!(
!results.iter().any(|c| c.match_reason.contains("kg")),
"stage=Semantic must suppress KG expansion, got {results:#?}"
);
}
#[tokio::test]
async fn search_graph_stage_forces_kg_expansion_on_definition_query() {
let idx = CodeIndexer::new("graph-stage-force", "/tmp/test");
idx.add_chunk(RawChunk {
id: "h:1".to_string(),
file: "h.rs".to_string(),
start_line: 1,
end_line: 1,
content: "fn caller() { /* dispatch to function */ }".to_string(),
function_name: Some("caller".to_string()),
language: Some("rust".to_string()),
chunk_type: crate::core::chunker::ChunkType::Function,
calls: vec!["target".to_string()],
inherits_from: Vec::new(),
chunk_depth: 0,
parent_chunk_id: None,
child_chunk_ids: Vec::new(),
nlp_keywords: Vec::new(),
nlp_code_refs: Vec::new(),
virtual_terms: Vec::new(),
})
.await
.unwrap();
idx.add_chunk(RawChunk {
id: "t:1".to_string(),
file: "t.rs".to_string(),
start_line: 1,
end_line: 1,
content: "fn target() {}".to_string(),
function_name: Some("target".to_string()),
language: Some("rust".to_string()),
chunk_type: crate::core::chunker::ChunkType::Function,
calls: Vec::new(),
inherits_from: Vec::new(),
chunk_depth: 0,
parent_chunk_id: None,
child_chunk_ids: Vec::new(),
nlp_keywords: Vec::new(),
nlp_code_refs: Vec::new(),
virtual_terms: Vec::new(),
})
.await
.unwrap();
let q = SearchQuery {
text: "target".to_string(),
top_k: 10,
expand_graph: true,
compact: false,
stage: Some(super::SearchStage::Graph),
..Default::default()
};
let results = idx.search(&q).await.unwrap();
let caller = results
.iter()
.find(|c| c.id == "h:1")
.unwrap_or_else(|| panic!("caller must surface via KG, got {results:#?}"));
assert!(
caller.match_reason.contains("kg"),
"stage=Graph must force KG expansion on caller, got match_reason={}",
caller.match_reason
);
}
#[tokio::test]
async fn test_symbol_graph_rebuilds_after_indexing() {
let idx = make_indexer();
assert_eq!(idx.symbol_graph().await.node_count(), 0);
idx.index_file("a.rs", "fn alpha() { beta(); }\nfn beta() {}\n")
.await
.unwrap();
let g = idx.symbol_graph().await;
assert!(g.node_count() >= 2, "graph should hold alpha + beta");
assert!(
!g.callees_of("alpha", 1).is_empty(),
"alpha should have a callee edge to beta"
);
}
#[tokio::test]
async fn test_entity_exact_match_finds_chunk() {
let idx = make_indexer();
idx.index_file("e.rs", "pub struct MyType { x: u32 }\nfn f() {}\n")
.await
.unwrap();
let hit = idx.entity_exact_match("MyType").await;
assert!(hit.is_some(), "expected entity_exact_match to find MyType");
let hit_id = hit.unwrap();
let chunks = idx.chunks.read().await;
assert!(
chunks
.get(&hit_id)
.map(|c| c.file == "e.rs")
.unwrap_or(false),
"matched chunk should live in e.rs",
);
}
#[tokio::test]
async fn test_entity_exact_match_struct_ranks_first() {
let idx = CodeIndexer::new("ent-rank-1", "/tmp/test");
idx.index_file(
"src/types.rs",
"pub struct FooBar { pub x: u32 }\n\nfn unrelated() { let _ = 1; }\n",
)
.await
.unwrap();
idx.index_file("src/other.rs", "fn other_thing() {}\n")
.await
.unwrap();
let q = SearchQuery {
text: "FooBar".to_string(),
top_k: 5,
expand_graph: false,
compact: false,
..Default::default()
};
let results = idx.search(&q).await.expect("search");
assert!(!results.is_empty(), "search must return at least one hit");
assert_eq!(
results[0].file,
"src/types.rs",
"FooBar's defining file must rank first; got {:?}",
results.iter().map(|r| &r.file).collect::<Vec<_>>(),
);
assert!(
results[0].content.contains("FooBar"),
"rank-1 chunk must contain the FooBar definition; got {:?}",
results[0].content,
);
}
#[tokio::test]
async fn test_entity_exact_match_skips_non_symbol_entities() {
let idx = make_indexer();
idx.index_file("lit.rs", "fn f() { let _ = \"this is a long literal\"; }\n")
.await
.unwrap();
assert!(
idx.entity_exact_match("literal").await.is_none(),
"non-symbol entity types must not satisfy entity_exact_match"
);
}
#[tokio::test]
async fn test_entity_exact_match_skips_multiword_query() {
let idx = make_indexer();
idx.index_file("e.rs", "use std::sync::Arc;\nfn f() {}\n")
.await
.unwrap();
assert!(idx.entity_exact_match("Arc thing").await.is_none());
}
#[tokio::test]
async fn test_virtual_terms_populated_from_entities() {
let idx = make_indexer();
idx.index_file(
"v.rs",
"use std::sync::Arc;\nfn f() { let _x: Arc<String> = Arc::new(String::new()); }\n",
)
.await
.unwrap();
let chunks = idx.chunks.read().await;
let f_chunk = chunks
.values()
.find(|c| c.function_name.as_deref() == Some("f"))
.expect("f chunk");
assert!(
f_chunk.virtual_terms.iter().any(|t| t == "Arc"),
"expected 'Arc' in virtual_terms, got {:?}",
f_chunk.virtual_terms
);
}
#[tokio::test]
async fn test_get_embedding_returns_some_after_indexing() {
let idx = make_indexer();
idx.add_chunk(raw("a:1:1", "a.rs", "fn alpha() {}"))
.await
.unwrap();
let emb = idx.get_embedding("a:1:1");
assert!(emb.is_some(), "expected embedding cached after add_chunk");
assert!(idx.get_embedding("nope").is_none());
}
#[tokio::test]
async fn test_similar_by_embedding_excludes_seed() {
let idx = make_indexer();
idx.add_chunk(raw("a:1:1", "a.rs", "fn alpha() {}"))
.await
.unwrap();
idx.add_chunk(raw("b:1:1", "b.rs", "fn beta() {}"))
.await
.unwrap();
let emb = idx.get_embedding("a:1:1").unwrap();
let results = idx
.similar_by_embedding(&emb, 5, Some("a:1:1"))
.await
.unwrap();
assert!(results.iter().all(|c| c.id != "a:1:1"));
assert!(results.iter().all(|c| c.match_reason == "vector"));
}
#[tokio::test]
async fn test_index_files_batch_indexes_all_chunks_once() {
let idx = make_indexer();
let files = vec![
(
"src/a.rs".to_string(),
"fn alpha() { beta(); }\nfn beta() {}\n".to_string(),
),
(
"src/b.rs".to_string(),
"fn gamma() {}\nfn delta() { gamma(); }\n".to_string(),
),
];
let added = idx.index_files_batch(&files).await.unwrap();
assert!(added >= 4, "expected at least 4 chunks, got {added}");
let g = idx.symbol_graph().await;
assert!(g.node_count() >= 4);
let q = SearchQuery {
text: "fn alpha".to_string(),
top_k: 5,
expand_graph: false,
compact: false,
..Default::default()
};
let r = idx.search(&q).await.unwrap();
assert!(r.iter().any(|c| c.file == "src/a.rs"));
}
#[tokio::test]
async fn test_index_files_batch_empty_input_is_noop() {
let idx = make_indexer();
let added = idx.index_files_batch(&[]).await.unwrap();
assert_eq!(added, 0);
assert_eq!(idx.chunk_count(), 0);
}
#[tokio::test]
async fn test_index_files_batch_bm25_only_mode() {
let idx = CodeIndexer::new("bm25-batch", "/tmp/test");
let files = vec![(
"x.rs".to_string(),
"fn authenticate() {}\nfn other() {}\n".to_string(),
)];
let added = idx.index_files_batch(&files).await.unwrap();
assert!(added >= 2);
let r = idx
.search(&SearchQuery {
text: "authenticate".to_string(),
top_k: 5,
expand_graph: false,
compact: false,
..Default::default()
})
.await
.unwrap();
assert!(r.iter().any(|c| c.content.contains("authenticate")));
}
#[tokio::test]
async fn search_uses_domain_terms_when_provided() {
use crate::core::classifier::{QueryClassifier, QueryIntent};
let plain = QueryClassifier::classify("rezo integration query");
assert_eq!(
plain,
QueryIntent::Unknown,
"baseline: plain classifier must treat the rezo phrase as Unknown"
);
let idx =
CodeIndexer::new("domain-test", "/tmp/domain").with_domain_terms(vec!["rezo".to_string()]);
idx.index_file("api.rs", "fn rezo_handler() {}\nfn other() {}\n")
.await
.expect("index_file ok");
let r = idx
.search(&SearchQuery {
text: "rezo integration query".into(),
top_k: 5,
expand_graph: false,
compact: false,
..Default::default()
})
.await
.expect("search ok");
assert!(
r.iter().any(|c| c.content.contains("rezo_handler")),
"expected rezo_handler chunk to appear in results: {:?}",
r.iter().map(|c| &c.content).collect::<Vec<_>>()
);
}
#[test]
fn test_file_type_multiplier_demotes_docs() {
assert_eq!(file_type_score_multiplier("src/auth.rs"), 1.0);
assert_eq!(file_type_score_multiplier("src/auth.py"), 1.0);
assert_eq!(file_type_score_multiplier("src/auth.go"), 1.0);
assert_eq!(file_type_score_multiplier("CHANGELOG.md"), 0.5);
assert_eq!(file_type_score_multiplier("docs/CLAUDE.md"), 0.5);
assert_eq!(file_type_score_multiplier("Cargo.toml"), 0.5);
assert_eq!(file_type_score_multiplier("config.yaml"), 0.5);
assert_eq!(file_type_score_multiplier("data.json"), 0.5);
assert_eq!(file_type_score_multiplier("README.MD"), 0.5);
}
#[tokio::test]
async fn test_definition_demotes_markdown_below_source() {
let idx = make_indexer();
idx.add_chunk(raw(
"doc:1",
"CHANGELOG.md",
"## CodeChunk struct\nCodeChunk struct fields: id, file. CodeChunk struct fields are stable.",
))
.await
.unwrap();
idx.add_chunk(raw(
"src:1",
"src/indexer.rs",
"pub struct CodeChunk { pub id: String, pub file: String }",
))
.await
.unwrap();
let q = SearchQuery {
text: "struct CodeChunk fields".to_string(),
top_k: 10,
expand_graph: false,
compact: false,
..Default::default()
};
let results = idx.search(&q).await.unwrap();
assert!(!results.is_empty(), "search must return results");
assert!(
results[0].file.ends_with(".rs"),
"Definition intent must rank source over docs, top result file = {}",
results[0].file
);
}
#[tokio::test]
async fn test_struct_definition_boost_surfaces_struct_over_usage() {
use crate::core::chunker::ChunkType;
use crate::core::classifier::{QueryClassifier, QueryIntent};
assert_eq!(
QueryClassifier::classify("HNSW lookup"),
QueryIntent::Definition,
"test pre-condition: short ALL-CAPS acronym query must classify as \
Definition (#119 + #197 short-query carve-out)"
);
let idx = make_indexer();
idx.add_chunk(raw_with_kind(
"def:1",
"src/hnsw_store.rs",
"pub struct HnswStore { index: Index, dim: usize }",
ChunkType::Struct,
Some("HnswStore"),
))
.await
.unwrap();
idx.add_chunk(raw(
"use:1",
"src/retrieval.rs",
"// HNSW lookup path.\n\
// Uses HNSW to retrieve top-k vectors.\n\
// HNSW lookup HNSW lookup HNSW HNSW HNSW HNSW HNSW HNSW HNSW HNSW",
))
.await
.unwrap();
idx.add_chunk(raw(
"use:2",
"src/mmr.rs",
"// MMR diversity reranker over HNSW lookup results.\n\
// HNSW HNSW HNSW lookup lookup lookup HNSW HNSW HNSW",
))
.await
.unwrap();
idx.add_chunk(raw(
"use:3",
"src/search.rs",
"// Top-level hybrid search: BM25 lane + HNSW lookup lane.\n\
// HNSW HNSW HNSW lookup lookup HNSW HNSW lookup HNSW",
))
.await
.unwrap();
let q = SearchQuery {
text: "HNSW lookup".to_string(),
top_k: 10,
expand_graph: false,
compact: false,
..Default::default()
};
let results = idx.search(&q).await.unwrap();
assert!(!results.is_empty(), "search must return results");
let top3_files: Vec<&str> = results.iter().take(3).map(|c| c.file.as_str()).collect();
assert!(
top3_files.contains(&"src/hnsw_store.rs"),
"issue #117 acceptance: hnsw_store.rs must rank in top-3 for \
the canonical acronym query; got top-3 files = {top3_files:?}, \
full ranking = {:?}",
results
.iter()
.map(|c| (c.file.as_str(), c.score))
.collect::<Vec<_>>()
);
}
#[tokio::test]
async fn test_function_definition_boost_surfaces_function_over_string_literal_usage() {
use crate::core::chunker::ChunkType;
use crate::core::classifier::{QueryClassifier, QueryIntent};
assert_eq!(
QueryClassifier::classify("get_call_chain"),
QueryIntent::Definition,
"test pre-condition: snake_case symbol must classify as Definition"
);
let idx = make_indexer();
idx.add_chunk(raw_with_kind(
"def:fn",
"src/call_chain.rs",
"pub fn get_call_chain(symbol: &str) -> Vec<String> {\n \
vec![symbol.to_string()]\n}",
ChunkType::Function,
Some("get_call_chain"),
))
.await
.unwrap();
idx.add_chunk(raw_with_kind(
"use:descriptor",
"src/mcp_descriptor.rs",
"const TOOL: &str = r#\"{ \"name\": \"get_call_chain\", \
\"description\": \"get_call_chain helper get_call_chain tool \
get_call_chain get_call_chain get_call_chain\" }\"#;",
ChunkType::Constant,
Some("TOOL"),
))
.await
.unwrap();
idx.add_chunk(raw(
"use:call",
"src/caller.rs",
"let chain = get_call_chain(\"foo\"); \
// get_call_chain returns the call chain; get_call_chain is a helper.",
))
.await
.unwrap();
let q = SearchQuery {
text: "get_call_chain".to_string(),
top_k: 10,
expand_graph: false,
compact: false,
..Default::default()
};
let results = idx.search(&q).await.unwrap();
assert!(!results.is_empty(), "search must return results");
let rank_of_fn = results
.iter()
.position(|c| c.file == "src/call_chain.rs")
.expect("Function declaration must be in results");
assert!(
rank_of_fn < 2,
"issue #122 acceptance: Function declaration must rank at top-2 or \
better; got rank {rank_of_fn}, ranking = {:?}",
results
.iter()
.map(|c| (c.file.as_str(), c.score))
.collect::<Vec<_>>()
);
}
#[tokio::test]
async fn test_method_definition_boost_fires() {
use crate::core::chunker::ChunkType;
let idx = make_indexer();
idx.add_chunk(raw_with_kind(
"def:method",
"src/parser.rs",
"impl Parser {\n \
pub fn parse_token(&self, input: &str) -> Token { Token::default() }\n}",
ChunkType::Method,
Some("parse_token"),
))
.await
.unwrap();
idx.add_chunk(raw(
"use:method",
"src/driver.rs",
"// driver calls parse_token; parse_token returns a Token. parse_token \
parse_token parse_token parse_token.",
))
.await
.unwrap();
let q = SearchQuery {
text: "parse_token".to_string(),
top_k: 10,
expand_graph: false,
compact: false,
..Default::default()
};
let results = idx.search(&q).await.unwrap();
let rank_of_method = results
.iter()
.position(|c| c.file == "src/parser.rs")
.expect("Method declaration must be in results");
let rank_of_usage = results
.iter()
.position(|c| c.file == "src/driver.rs")
.expect("Usage chunk must be in results");
assert!(
rank_of_method < rank_of_usage,
"issue #122: Method declaration (rank {rank_of_method}) must \
out-rank the usage chunk (rank {rank_of_usage}); ranking = {:?}",
results
.iter()
.map(|c| (c.file.as_str(), c.score))
.collect::<Vec<_>>()
);
}
#[tokio::test]
async fn test_function_boost_skipped_on_conceptual_intent() {
use crate::core::chunker::ChunkType;
use crate::core::classifier::{QueryClassifier, QueryIntent};
assert_eq!(
QueryClassifier::classify("how does parse_token work in the parser"),
QueryIntent::Conceptual,
"test pre-condition: 'how does X work' must classify as Conceptual"
);
let idx = make_indexer();
idx.add_chunk(raw_with_kind(
"def:fn",
"src/parser.rs",
"pub fn parse_token(input: &str) -> Token { Token::default() }",
ChunkType::Function,
Some("parse_token"),
))
.await
.unwrap();
idx.add_chunk(raw(
"doc:1",
"docs/ARCHITECTURE.md",
"How does parse_token work? parse_token in the parser tokenises input \
strings into Token values. parse_token parse_token parser parser \
tokenise tokenise.",
))
.await
.unwrap();
let q = SearchQuery {
text: "how does parse_token work in the parser".to_string(),
top_k: 10,
expand_graph: false,
compact: false,
..Default::default()
};
let results = idx.search(&q).await.unwrap();
assert!(
results.iter().any(|c| c.file.ends_with(".md")),
"Conceptual intent must not apply the function-definition boost — \
the doc chunk should still surface; ranking = {:?}",
results
.iter()
.map(|c| (c.file.as_str(), c.score))
.collect::<Vec<_>>()
);
}
#[tokio::test]
async fn test_function_boost_no_op_when_function_name_missing() {
use crate::core::chunker::ChunkType;
let idx = make_indexer();
idx.add_chunk(raw_with_kind(
"def:noname",
"src/anon.rs",
"// anonymous body referencing get_call_chain\n\
get_call_chain(\"x\");",
ChunkType::Function,
None,
))
.await
.unwrap();
idx.add_chunk(raw_with_kind(
"def:empty",
"src/empty.rs",
"// another anon block: get_call_chain helper",
ChunkType::Function,
Some(""),
))
.await
.unwrap();
idx.add_chunk(raw(
"use:1",
"src/use.rs",
"let r = get_call_chain(\"foo\");",
))
.await
.unwrap();
let q = SearchQuery {
text: "get_call_chain".to_string(),
top_k: 10,
expand_graph: false,
compact: false,
..Default::default()
};
let results = idx.search(&q).await.unwrap();
assert!(
!results.is_empty(),
"search must return results — no panic in the boost path"
);
}
#[tokio::test]
async fn test_conceptual_does_not_demote_docs() {
let idx = make_indexer();
idx.add_chunk(raw(
"doc:1",
"ARCHITECTURE.md",
"How does the CodeChunk pipeline work in trusty-search.",
))
.await
.unwrap();
idx.add_chunk(raw(
"src:1",
"src/indexer.rs",
"pub struct CodeChunk { pub id: String }",
))
.await
.unwrap();
let q = SearchQuery {
text: "how does the CodeChunk pipeline work".to_string(),
top_k: 10,
expand_graph: false,
compact: false,
..Default::default()
};
let results = idx.search(&q).await.unwrap();
assert!(
results.iter().any(|c| c.file.ends_with(".md")),
"Conceptual queries in default mode must still surface .md docs \
(intent-aware effective-mode override, issue #73)"
);
}
#[tokio::test]
async fn test_code_mode_source_outranks_changelog_pre_truncation() {
use crate::core::classifier::{QueryClassifier, QueryIntent};
let intent = QueryClassifier::classify("error handling retry logic deprecated path");
assert_eq!(
intent,
QueryIntent::BugDebt,
"test pre-condition: query should classify as BugDebt so explicit Code mode survives"
);
let idx = make_indexer();
idx.add_chunk(raw(
"doc:1",
"CHANGELOG.md",
"error handling error handling error handling retry logic retry logic \
deprecated path deprecated path error handling retry logic deprecated \
error handling retry logic deprecated path error handling retry logic",
))
.await
.unwrap();
idx.add_chunk(raw(
"src:1",
"src/retry.rs",
"fn handle_error_with_retry() { /* error handling + retry logic, deprecated path */ }",
))
.await
.unwrap();
let q = SearchQuery {
text: "error handling retry logic deprecated path".to_string(),
top_k: 1,
expand_graph: false,
compact: false,
mode: crate::core::indexer::SearchMode::Code,
..Default::default()
};
let results = idx.search(&q).await.unwrap();
assert_eq!(
results.len(),
1,
"with top_k=1 the source chunk must survive into the single slot \
(pre-truncation penalty, issue #72) — got {:?}",
results.iter().map(|c| &c.file).collect::<Vec<_>>()
);
assert!(
results[0].file.ends_with(".rs"),
"code-mode query must return the source file, not be crowded out by \
high-TF prose (issue #72); got {}",
results[0].file
);
}
#[tokio::test]
async fn test_definition_default_mode_returns_docs_when_no_source_matches() {
use crate::core::classifier::{QueryClassifier, QueryIntent};
let intent = QueryClassifier::classify("UserPromptSubmit hook registration");
assert_eq!(
intent,
QueryIntent::Definition,
"test pre-condition: PascalCase identifier should classify as Definition"
);
let idx = make_indexer();
idx.add_chunk(raw(
"doc:1",
"docs/HOOKS.md",
"# UserPromptSubmit hook registration\n\
The UserPromptSubmit hook fires whenever the user submits a prompt. \
Register your hook handler via the registration API to receive these events.",
))
.await
.unwrap();
let q = SearchQuery {
text: "UserPromptSubmit hook registration".to_string(),
top_k: 10,
expand_graph: false,
compact: false,
..Default::default()
};
let results = idx.search(&q).await.unwrap();
assert!(
!results.is_empty(),
"Definition-intent query against docs-only corpus returned 0 results — \
the intent-aware mode override is broken (issue #79)"
);
assert!(
results.iter().any(|c| c.file.ends_with(".md")),
"expected the .md chunk to survive the post-filter, got: {:?}",
results.iter().map(|c| &c.file).collect::<Vec<_>>()
);
}
#[tokio::test]
async fn test_conceptual_default_mode_returns_docs_when_no_source_matches() {
use crate::core::classifier::{QueryClassifier, QueryIntent};
let intent = QueryClassifier::classify("how does the hook system work");
assert_eq!(
intent,
QueryIntent::Conceptual,
"test pre-condition: 'how does' should classify as Conceptual"
);
let idx = make_indexer();
idx.add_chunk(raw(
"doc:1",
"docs/ARCHITECTURE.md",
"## How the hook system works\n\
The hook system dispatches events to registered handlers in priority order.",
))
.await
.unwrap();
let q = SearchQuery {
text: "how does the hook system work".to_string(),
top_k: 10,
expand_graph: false,
compact: false,
..Default::default()
};
let results = idx.search(&q).await.unwrap();
assert!(
!results.is_empty(),
"Conceptual-intent query against docs-only corpus returned 0 results — \
the intent-aware mode override is broken (issue #79)"
);
}
#[tokio::test]
async fn test_kg_results_survive_top_k_truncation() {
let idx = CodeIndexer::new("kg-trunc", "/tmp/test");
idx.add_chunk(RawChunk {
id: "src:caller".to_string(),
file: "caller.rs".to_string(),
start_line: 1,
end_line: 3,
content: "fn caller() { /* dispatches */ }".to_string(),
function_name: Some("caller".to_string()),
language: Some("rust".to_string()),
chunk_type: crate::core::chunker::ChunkType::Function,
calls: vec!["authenticate".to_string()],
inherits_from: Vec::new(),
chunk_depth: 0,
parent_chunk_id: None,
child_chunk_ids: Vec::new(),
nlp_keywords: Vec::new(),
nlp_code_refs: Vec::new(),
virtual_terms: Vec::new(),
})
.await
.unwrap();
idx.add_chunk(RawChunk {
id: "src:authenticate".to_string(),
file: "auth.rs".to_string(),
start_line: 1,
end_line: 1,
content: "fn authenticate() {}".to_string(),
function_name: Some("authenticate".to_string()),
language: Some("rust".to_string()),
chunk_type: crate::core::chunker::ChunkType::Function,
calls: Vec::new(),
inherits_from: Vec::new(),
chunk_depth: 0,
parent_chunk_id: None,
child_chunk_ids: Vec::new(),
nlp_keywords: Vec::new(),
nlp_code_refs: Vec::new(),
virtual_terms: Vec::new(),
})
.await
.unwrap();
let q = SearchQuery {
text: "callers of authenticate".to_string(),
top_k: 10,
expand_graph: true,
compact: false,
..Default::default()
};
let results = idx.search(&q).await.unwrap();
assert!(
results.iter().any(|c| c.match_reason == "hybrid+kg"),
"at least one result must carry 'hybrid+kg' match_reason, got: {:#?}",
results
.iter()
.map(|c| (&c.id, &c.match_reason))
.collect::<Vec<_>>()
);
}
#[test]
fn test_intent_routing_definitions() {
use crate::core::classifier::QueryIntent;
let (a, b, kg) = QueryIntent::Definition.weights();
assert!((a - 0.3).abs() < 1e-6 && (b - 0.7).abs() < 1e-6 && !kg);
let (a, b, kg) = QueryIntent::Usage.weights();
assert!((a - 0.5).abs() < 1e-6 && (b - 0.5).abs() < 1e-6 && kg);
}
#[tokio::test]
async fn test_enumerate_chunks_paginates_stable_order() {
let idx = make_indexer();
fn raw_lines(id: &str, file: &str, start: usize, end: usize, content: &str) -> RawChunk {
let mut r = raw(id, file, content);
r.start_line = start;
r.end_line = end;
r
}
idx.add_chunk(raw_lines("b.rs:10:20", "b.rs", 10, 20, "fn b_two() {}"))
.await
.unwrap();
idx.add_chunk(raw_lines("a.rs:1:5", "a.rs", 1, 5, "fn a_one() {}"))
.await
.unwrap();
idx.add_chunk(raw_lines("b.rs:1:5", "b.rs", 1, 5, "fn b_one() {}"))
.await
.unwrap();
idx.add_chunk(raw_lines("a.rs:30:40", "a.rs", 30, 40, "fn a_two() {}"))
.await
.unwrap();
let (total_all, all) = idx.enumerate_chunks(0, 100).await;
assert_eq!(total_all, 4);
let ids: Vec<_> = all.iter().map(|c| c.id.as_str()).collect();
assert_eq!(
ids,
vec!["a.rs:1:5", "a.rs:30:40", "b.rs:1:5", "b.rs:10:20"]
);
let (total_p1, page1) = idx.enumerate_chunks(0, 2).await;
let (total_p2, page2) = idx.enumerate_chunks(2, 2).await;
assert_eq!(total_p1, 4);
assert_eq!(total_p2, 4);
assert_eq!(page1.len(), 2);
assert_eq!(page2.len(), 2);
let combined: Vec<_> = page1
.iter()
.chain(page2.iter())
.map(|c| c.id.as_str())
.collect();
assert_eq!(combined, ids);
let (total_end, end) = idx.enumerate_chunks(10, 5).await;
assert_eq!(total_end, 4);
assert!(end.is_empty());
let (total_z, z) = idx.enumerate_chunks(0, 0).await;
assert_eq!(total_z, 4);
assert!(z.is_empty());
}
fn make_branch_query(text: &str, files: Vec<String>, boost: f32) -> SearchQuery {
SearchQuery {
text: text.to_string(),
top_k: 10,
expand_graph: false,
compact: false,
branch_files: Some(files),
branch_boost: boost,
branch: None,
mode: SearchMode::Code,
exclude_archived: false,
stage: None,
}
}
#[tokio::test]
async fn test_branch_boost_applied_to_matching_chunks() {
let idx = make_indexer();
idx.add_chunk(raw(
"src/on.rs:1:1",
"src/on.rs",
"fn authenticate(user: &str) -> bool { true }",
))
.await
.unwrap();
idx.add_chunk(raw(
"src/off.rs:1:1",
"src/off.rs",
"fn authenticate(user: &str) -> bool { true }",
))
.await
.unwrap();
let q = make_branch_query("fn authenticate", vec!["src/on.rs".to_string()], 1.5);
let results = idx.search(&q).await.unwrap();
assert!(!results.is_empty(), "branch-aware search must return hits");
let on_branch = results
.iter()
.find(|c| c.file == "src/on.rs")
.expect("on-branch chunk in results");
let off_branch = results.iter().find(|c| c.file == "src/off.rs");
assert!(on_branch.on_branch, "on_branch must be true for on.rs");
if let Some(off) = off_branch {
assert!(!off.on_branch, "on_branch must be false for off.rs");
assert!(
on_branch.score >= off.score,
"branch boost must make on.rs >= off.rs (got {} vs {})",
on_branch.score,
off.score
);
}
assert_eq!(
results[0].file,
"src/on.rs",
"on-branch chunk must rank first; got {:?}",
results.iter().map(|c| &c.file).collect::<Vec<_>>()
);
}
#[tokio::test]
async fn test_branch_boost_clamped_to_3x() {
let q = make_branch_query("foo", vec!["src/on.rs".to_string()], 10.0);
let root = std::path::PathBuf::from("/tmp/test");
let (set, boost) = super::search::resolve_branch_set(&q, &root);
assert!(set.is_some(), "branch set must be present");
assert!(
(boost - 3.0).abs() < f32::EPSILON,
"branch_boost=10.0 must clamp to 3.0, got {boost}"
);
let q_low = make_branch_query("foo", vec!["src/on.rs".to_string()], 0.0);
let (set_low, boost_low) = super::search::resolve_branch_set(&q_low, &root);
assert!(
(boost_low - 1.0).abs() < f32::EPSILON,
"branch_boost=0.0 must clamp to 1.0, got {boost_low}"
);
assert!(
set_low.is_none(),
"branch_boost=1.0 must drop the set (no-op)"
);
}
#[tokio::test]
async fn test_on_branch_set_correctly() {
let idx = make_indexer();
idx.add_chunk(raw(
"src/on.rs:1:1",
"src/on.rs",
"fn authenticate() -> bool { true }",
))
.await
.unwrap();
idx.add_chunk(raw(
"src/off.rs:1:1",
"src/off.rs",
"fn authenticate() -> bool { true }",
))
.await
.unwrap();
let q = make_branch_query("fn authenticate", vec!["src/on.rs".to_string()], 1.5);
let results = idx.search(&q).await.unwrap();
for c in &results {
if c.file == "src/on.rs" {
assert!(c.on_branch, "on.rs must be flagged on_branch=true");
} else if c.file == "src/off.rs" {
assert!(!c.on_branch, "off.rs must be flagged on_branch=false");
}
}
let q2 = make_branch_query("fn authenticate", vec!["./src/on.rs".to_string()], 1.5);
let results2 = idx.search(&q2).await.unwrap();
let on2 = results2
.iter()
.find(|c| c.file == "src/on.rs")
.expect("on-branch chunk in results");
assert!(on2.on_branch, "leading './' must be normalized away");
}
#[tokio::test]
async fn test_no_boost_when_branch_files_absent() {
let idx = make_indexer();
idx.add_chunk(raw(
"src/auth.rs:1:5",
"src/auth.rs",
"fn authenticate(user: &str, password: &str) -> bool { true }",
))
.await
.unwrap();
idx.add_chunk(raw(
"src/render.rs:1:3",
"src/render.rs",
"fn render_ui_components() { /* svelte */ }",
))
.await
.unwrap();
let q = SearchQuery {
text: "fn authenticate".to_string(),
top_k: 5,
expand_graph: false,
compact: false,
branch_files: None,
branch_boost: SearchQuery::default_branch_boost(),
branch: None,
mode: SearchMode::Code,
exclude_archived: false,
stage: None,
};
let results = idx.search(&q).await.unwrap();
assert!(!results.is_empty());
for c in &results {
assert!(
!c.on_branch,
"on_branch must default to false when no branch context provided"
);
}
}
use crate::core::corpus::CorpusStore;
fn make_indexer_with_corpus(redb_path: &std::path::Path) -> CodeIndexer {
let mut idx = CodeIndexer::new("corpus-test", "/tmp/corpus-test");
let store = CorpusStore::open(redb_path).expect("open corpus store");
idx.set_corpus_store(Arc::new(store));
idx
}
#[tokio::test]
async fn test_corpus_store_roundtrip() {
let dir = tempfile::tempdir().unwrap();
let redb_path = dir.path().join("index.redb");
{
let idx = make_indexer_with_corpus(&redb_path);
idx.index_files_batch(&[
("src/auth.rs".into(), "fn authenticate() {}".into()),
("src/token.rs".into(), "fn verify_token() {}".into()),
])
.await
.expect("index batch");
assert!(idx.chunk_count() >= 2);
}
{
let store = CorpusStore::open(&redb_path).unwrap();
assert!(
store.chunk_count().unwrap() >= 2,
"committed batch was not persisted to redb"
);
}
let restored = make_indexer_with_corpus(&redb_path);
let n = restored
.load_chunks_from_redb()
.await
.expect("warm-boot from redb");
assert!(n >= 2, "warm-boot rehydrated {n} chunks, expected >= 2");
assert_eq!(restored.chunk_count(), n);
let bm25 = restored.bm25.read().await;
let hits = bm25.score_query_all("authenticate", 5);
drop(bm25);
assert!(
!hits.is_empty(),
"BM25 not rebuilt from redb-restored chunks"
);
}
#[tokio::test]
async fn test_corpus_store_warm_boot_empty_is_zero() {
let dir = tempfile::tempdir().unwrap();
let idx = make_indexer_with_corpus(&dir.path().join("fresh.redb"));
let n = idx.load_chunks_from_redb().await.unwrap();
assert_eq!(n, 0, "empty redb corpus must rehydrate zero chunks");
let bare = CodeIndexer::new("bare", "/tmp/bare");
assert_eq!(bare.load_chunks_from_redb().await.unwrap(), 0);
}
#[tokio::test]
async fn test_corpus_store_deletes_on_remove() {
let dir = tempfile::tempdir().unwrap();
let redb_path = dir.path().join("index.redb");
let idx = make_indexer_with_corpus(&redb_path);
idx.index_files_batch(&[
("src/keep.rs".into(), "fn keep_me() {}".into()),
("src/drop.rs".into(), "fn drop_me() {}".into()),
])
.await
.unwrap();
let before = idx.chunk_count();
assert!(before >= 2);
idx.remove_file("src/drop.rs").await.unwrap();
drop(idx);
let chunks = {
let store = CorpusStore::open(&redb_path).unwrap();
store.load_all_chunks().unwrap()
};
assert!(
chunks.iter().all(|c| c.file != "src/drop.rs"),
"removed file's chunks still present in redb after remove_file"
);
assert!(
chunks.iter().any(|c| c.file == "src/keep.rs"),
"remove_file evicted the wrong file's chunks from redb"
);
let restored = make_indexer_with_corpus(&redb_path);
restored.load_chunks_from_redb().await.unwrap();
let ids = restored.find_chunk_id("drop.rs", None).await;
assert!(ids.is_none(), "deleted chunk resurrected on warm-boot");
}
#[tokio::test]
async fn test_corpus_store_migrates_from_json() {
let dir = tempfile::tempdir().unwrap();
let json_path = dir.path().join("chunks.json");
let redb_path = dir.path().join("index.redb");
{
let legacy = make_indexer();
legacy
.add_chunk(raw("a", "src/a.rs", "fn legacy_a() {}"))
.await
.unwrap();
legacy
.add_chunk(raw("b", "src/b.rs", "fn legacy_b() {}"))
.await
.unwrap();
legacy.save_chunks_to_disk(&json_path).await.unwrap();
}
assert!(json_path.exists());
let idx = make_indexer_with_corpus(&redb_path);
let n = idx.load_chunks_from_disk(&json_path).await.unwrap();
assert_eq!(n, 2);
idx.migrate_corpus_to_redb().await;
drop(idx);
let restored = make_indexer_with_corpus(&redb_path);
let m = restored.load_chunks_from_redb().await.unwrap();
assert_eq!(m, 2, "redb corpus was not seeded by the JSON migration");
}
#[tokio::test]
async fn test_corpus_store_swap_and_take() {
let dir = tempfile::tempdir().unwrap();
let live_path = dir.path().join("index.redb");
let tmp_path = dir.path().join("index.redb.tmp");
let mut idx = make_indexer_with_corpus(&live_path);
assert!(idx.has_corpus_store());
let staged = Arc::new(CorpusStore::open_fresh(&tmp_path).unwrap());
let prev = idx.swap_corpus_store(staged).expect("prior store returned");
assert_eq!(prev.path(), live_path.as_path());
drop(prev);
idx.index_files_batch(&[("src/new.rs".into(), "fn brand_new() {}".into())])
.await
.unwrap();
let staged_back = idx.take_corpus_store().expect("staging store taken");
assert_eq!(staged_back.path(), tmp_path.as_path());
assert!(!idx.has_corpus_store());
assert!(
staged_back.chunk_count().unwrap() >= 1,
"batch did not commit to the staged corpus"
);
drop(staged_back);
let live = CorpusStore::open(&live_path).unwrap();
assert_eq!(
live.chunk_count().unwrap(),
0,
"live corpus was mutated while a staging corpus was swapped in"
);
}
#[test]
fn test_compute_match_reason_fallback_label() {
assert_eq!(
compute_match_reason(false, false, false),
"fallback:ripgrep"
);
assert_eq!(compute_match_reason(true, false, false), "vector");
assert_eq!(compute_match_reason(false, true, false), "bm25");
assert_eq!(compute_match_reason(true, true, false), "hybrid");
assert_eq!(compute_match_reason(false, false, true), "hybrid+kg");
}
#[tokio::test]
async fn test_grep_fallback_returns_substring_hits() {
let idx = make_indexer();
idx.add_chunk(raw("a", "src/a.rs", "fn alpha_qwerty_unique() {}"))
.await
.unwrap();
idx.add_chunk(raw("b", "src/b.rs", "fn beta() {}"))
.await
.unwrap();
let hits = idx.grep_fallback_search("alpha_qwerty_unique", 5).await;
assert_eq!(hits.len(), 1);
assert_eq!(hits[0].0, "a");
assert!(hits[0].1 < 0.01, "fallback score should be sub-0.01");
}
#[tokio::test]
async fn test_grep_fallback_treats_query_as_literal() {
let idx = make_indexer();
idx.add_chunk(raw("a", "src/a.rs", "fn foo() {} // literal: a.b.c"))
.await
.unwrap();
idx.add_chunk(raw("b", "src/b.rs", "fn aXbYc() {}"))
.await
.unwrap();
let hits = idx.grep_fallback_search("a.b.c", 5).await;
let ids: Vec<&str> = hits.iter().map(|(id, _)| id.as_str()).collect();
assert!(ids.contains(&"a"), "literal match in a missing: {ids:?}");
assert!(
!ids.contains(&"b"),
"wildcard-style match leaked through regex escape"
);
}
#[test]
fn test_merge_grep_lane_appends_new_ids() {
use super::search::merge_grep_lane;
let fused = vec![("a".to_string(), 0.05), ("b".to_string(), 0.04)];
let grep_lane = vec![("c".to_string(), 0.001)];
let out = merge_grep_lane(fused, &grep_lane, 0.5, 10);
let ids: Vec<&str> = out.iter().map(|(id, _)| id.as_str()).collect();
assert!(ids.contains(&"a"));
assert!(ids.contains(&"b"));
assert!(ids.contains(&"c"));
assert_eq!(out[0].0, "a");
}
#[tokio::test]
async fn test_archive_downrank_demotes_deprecated_chunks() {
let idx = make_indexer();
idx.add_chunk(raw("live", "src/auth.rs", "fn authenticate_user_xyz() {}"))
.await
.unwrap();
idx.add_chunk(raw(
"old",
"src/legacy/auth_old.rs",
"fn authenticate_user_xyz_old() {}",
))
.await
.unwrap();
let results = idx
.search(&SearchQuery {
text: "authenticate_user_xyz".to_string(),
top_k: 5,
expand_graph: false,
compact: false,
..Default::default()
})
.await
.unwrap();
let pos_live = results.iter().position(|c| c.id == "live");
let pos_old = results.iter().position(|c| c.id == "old");
assert!(pos_live.is_some(), "live chunk missing from results");
assert!(pos_old.is_some(), "archived chunk missing from results");
assert!(
pos_live.unwrap() < pos_old.unwrap(),
"live chunk should outrank archived chunk: live={pos_live:?} old={pos_old:?}"
);
let old_chunk = results.iter().find(|c| c.id == "old").unwrap();
assert!(
old_chunk.archive_reason.is_some(),
"archived chunk missing archive_reason: {:?}",
old_chunk
);
let reason = old_chunk.archive_reason.as_deref().unwrap();
assert!(
reason.starts_with("path:"),
"expected path-prefix reason, got {reason}"
);
}
#[tokio::test]
async fn test_exclude_archived_drops_archive_chunks() {
let idx = make_indexer();
idx.add_chunk(raw("live", "src/auth.rs", "fn authenticate_user_xyz() {}"))
.await
.unwrap();
for (id, path) in [
("a1", "src/_archive/auth.rs"),
("a2", "src/archive/auth.rs"),
("a3", "src/_deprecated/auth.rs"),
("a4", "src/old/auth.rs"),
("a5", "src/.archive/auth.rs"),
] {
idx.add_chunk(raw(id, path, "fn authenticate_user_xyz_old() {}"))
.await
.unwrap();
}
let downranked = idx
.search(&SearchQuery {
text: "authenticate_user_xyz".to_string(),
top_k: 10,
expand_graph: false,
compact: false,
..Default::default()
})
.await
.unwrap();
assert!(
downranked.iter().any(|c| c.id.starts_with('a')),
"pre-condition: archived chunks should be present (downranked) without the flag"
);
let filtered = idx
.search(&SearchQuery {
text: "authenticate_user_xyz".to_string(),
top_k: 10,
expand_graph: false,
compact: false,
exclude_archived: true,
..Default::default()
})
.await
.unwrap();
assert!(
filtered.iter().all(|c| c.id == "live"),
"exclude_archived must drop every archived chunk; got {:?}",
filtered.iter().map(|c| &c.file).collect::<Vec<_>>()
);
assert!(
filtered.iter().any(|c| c.id == "live"),
"the live chunk must still be returned"
);
}
#[tokio::test]
async fn test_archive_downrank_skips_clean_chunks() {
let idx = make_indexer();
idx.add_chunk(raw("clean", "src/main.rs", "fn run_main() {}"))
.await
.unwrap();
let results = idx
.search(&SearchQuery {
text: "run_main".to_string(),
top_k: 5,
expand_graph: false,
compact: false,
..Default::default()
})
.await
.unwrap();
let chunk = results.iter().find(|c| c.id == "clean").unwrap();
assert!(chunk.archive_reason.is_none());
}
#[tokio::test]
async fn test_search_result_preserves_line_numbers() {
let idx = make_indexer();
let mut chunk = raw("a", "src/a.rs", "fn alpha_qwerty_unique() {}");
chunk.start_line = 42;
chunk.end_line = 50;
idx.add_chunk(chunk).await.unwrap();
let results = idx
.search(&SearchQuery {
text: "alpha_qwerty_unique".to_string(),
top_k: 5,
expand_graph: false,
compact: false,
..Default::default()
})
.await
.unwrap();
assert!(!results.is_empty());
assert_eq!(results[0].start_line, 42);
assert_eq!(results[0].end_line, 50);
}
async fn seed_mode_filter_corpus(idx: &CodeIndexer) {
idx.add_chunk(raw(
"src:1",
"src/lib.rs",
"fn alpha_qwerty() -> bool { true }",
))
.await
.unwrap();
idx.add_chunk(raw(
"doc:1",
"docs/intro.md",
"# alpha_qwerty\nDocumentation about alpha_qwerty.",
))
.await
.unwrap();
idx.add_chunk(raw(
"named:1",
"LICENSE",
"MIT licence text mentioning alpha_qwerty.",
))
.await
.unwrap();
idx.add_chunk(raw(
"cfg:1",
"Cargo.toml",
"[package]\nname = \"alpha_qwerty\"",
))
.await
.unwrap();
idx.add_chunk(raw(
"data:1",
"fixtures/alpha.json",
"{\"name\": \"alpha_qwerty\"}",
))
.await
.unwrap();
}
#[tokio::test]
async fn test_mode_filter_code_returns_only_source() {
let idx = make_indexer();
seed_mode_filter_corpus(&idx).await;
let q = SearchQuery {
text: "alpha".to_string(),
top_k: 20,
expand_graph: false,
compact: false,
mode: SearchMode::Code,
..Default::default()
};
let results = idx.search(&q).await.unwrap();
let files: Vec<&str> = results.iter().map(|c| c.file.as_str()).collect();
assert!(
files.contains(&"src/lib.rs"),
"code mode must include source: {files:?}"
);
assert!(
!files.iter().any(|f| f.ends_with(".md")),
"code mode must exclude .md: {files:?}"
);
assert!(
!files.contains(&"LICENSE"),
"code mode must exclude named docs: {files:?}"
);
assert!(
!files.iter().any(|f| f.ends_with(".toml")),
"code mode must exclude config: {files:?}"
);
assert!(
!files.iter().any(|f| f.ends_with(".json")),
"code mode must exclude data: {files:?}"
);
}
#[tokio::test]
async fn test_mode_filter_text_returns_only_prose_and_named_docs() {
let idx = make_indexer();
seed_mode_filter_corpus(&idx).await;
let q = SearchQuery {
text: "alpha_qwerty".to_string(),
top_k: 20,
expand_graph: false,
compact: false,
mode: SearchMode::Text,
..Default::default()
};
let results = idx.search(&q).await.unwrap();
let files: Vec<&str> = results.iter().map(|c| c.file.as_str()).collect();
assert!(
files.iter().any(|f| f.ends_with(".md")),
"text mode must include prose: {files:?}"
);
assert!(
files.contains(&"LICENSE"),
"text mode must include named docs without extension: {files:?}"
);
assert!(
!files.iter().any(|f| f.ends_with(".rs")),
"text mode must exclude source: {files:?}"
);
assert!(
!files.iter().any(|f| f.ends_with(".toml")),
"text mode must exclude config: {files:?}"
);
assert!(
!files.iter().any(|f| f.ends_with(".json")),
"text mode must exclude data: {files:?}"
);
}
#[tokio::test]
async fn test_mode_filter_data_returns_only_structured_data() {
let idx = make_indexer();
seed_mode_filter_corpus(&idx).await;
let q = SearchQuery {
text: "alpha_qwerty".to_string(),
top_k: 20,
expand_graph: false,
compact: false,
mode: SearchMode::Data,
..Default::default()
};
let results = idx.search(&q).await.unwrap();
let files: Vec<&str> = results.iter().map(|c| c.file.as_str()).collect();
assert!(
files.iter().any(|f| f.ends_with(".toml")),
"data mode must include config: {files:?}"
);
assert!(
files.iter().any(|f| f.ends_with(".json")),
"data mode must include data files: {files:?}"
);
assert!(
!files.iter().any(|f| f.ends_with(".rs")),
"data mode must exclude source: {files:?}"
);
assert!(
!files.iter().any(|f| f.ends_with(".md")),
"data mode must exclude prose: {files:?}"
);
assert!(
!files.contains(&"LICENSE"),
"data mode must exclude named docs: {files:?}"
);
}
#[tokio::test]
async fn test_mode_filter_all_returns_everything() {
let idx = make_indexer();
seed_mode_filter_corpus(&idx).await;
let q = SearchQuery {
text: "alpha_qwerty".to_string(),
top_k: 20,
expand_graph: false,
compact: false,
mode: SearchMode::All,
..Default::default()
};
let results = idx.search(&q).await.unwrap();
let files: Vec<&str> = results.iter().map(|c| c.file.as_str()).collect();
for expected in &[
"src/lib.rs",
"docs/intro.md",
"LICENSE",
"Cargo.toml",
"fixtures/alpha.json",
] {
assert!(
files.contains(expected),
"all mode must include {expected}: {files:?}"
);
}
}
#[test]
fn idle_evict_secs_default_and_env_override() {
let prior = std::env::var("TRUSTY_CHUNKS_IDLE_EVICT_SECS").ok();
unsafe { std::env::remove_var("TRUSTY_CHUNKS_IDLE_EVICT_SECS") };
assert_eq!(idle_evict_secs(), DEFAULT_CHUNKS_IDLE_EVICT_SECS);
unsafe { std::env::set_var("TRUSTY_CHUNKS_IDLE_EVICT_SECS", "30") };
assert_eq!(idle_evict_secs(), 30);
unsafe { std::env::set_var("TRUSTY_CHUNKS_IDLE_EVICT_SECS", "0") };
assert_eq!(idle_evict_secs(), 0);
unsafe { std::env::set_var("TRUSTY_CHUNKS_IDLE_EVICT_SECS", "nope") };
assert_eq!(idle_evict_secs(), DEFAULT_CHUNKS_IDLE_EVICT_SECS);
unsafe {
match prior {
Some(v) => std::env::set_var("TRUSTY_CHUNKS_IDLE_EVICT_SECS", v),
None => std::env::remove_var("TRUSTY_CHUNKS_IDLE_EVICT_SECS"),
}
}
}
#[tokio::test]
async fn idle_eviction_drops_and_lazily_rehydrates_chunks() {
let dir = tempfile::tempdir().unwrap();
let redb_path = dir.path().join("index.redb");
let idx = make_indexer_with_corpus(&redb_path);
idx.index_files_batch(&[
("src/auth.rs".into(), "fn authenticate() {}".into()),
("src/token.rs".into(), "fn verify_token() {}".into()),
])
.await
.expect("index batch");
let resident_before = idx.in_memory_chunk_count().await;
assert!(resident_before >= 2, "expected >= 2 resident chunks");
assert_eq!(idx.evict_chunks_if_idle(std::time::Duration::ZERO).await, 0);
assert_eq!(idx.in_memory_chunk_count().await, resident_before);
assert_eq!(
idx.evict_chunks_if_idle(std::time::Duration::from_secs(3600))
.await,
0
);
assert_eq!(idx.in_memory_chunk_count().await, resident_before);
let evicted = idx
.evict_chunks_if_idle(std::time::Duration::from_nanos(1))
.await;
assert_eq!(evicted, resident_before, "eviction should drop every chunk");
assert_eq!(
idx.in_memory_chunk_count().await,
0,
"map must be empty after eviction"
);
assert!(
idx.chunks_evicted.load(Ordering::Relaxed),
"chunks_evicted flag must be set after eviction"
);
assert!(idx.corpus_store().unwrap().chunk_count().unwrap() >= 2);
let snapshot = idx.raw_chunks_snapshot().await;
assert_eq!(
snapshot.len(),
resident_before,
"raw_chunks_snapshot must rehydrate the evicted map"
);
assert_eq!(
idx.in_memory_chunk_count().await,
resident_before,
"map must be repopulated after a read"
);
assert!(
!idx.chunks_evicted.load(Ordering::Relaxed),
"chunks_evicted flag must clear after rehydration"
);
}
#[tokio::test]
async fn idle_eviction_skips_indexers_without_corpus() {
let idx = make_indexer(); idx.add_chunk(raw("a", "src/a.rs", "fn a() {}"))
.await
.unwrap();
let before = idx.in_memory_chunk_count().await;
assert_eq!(before, 1);
let evicted = idx
.evict_chunks_if_idle(std::time::Duration::from_nanos(1))
.await;
assert_eq!(evicted, 0, "must not evict without a durable corpus");
assert_eq!(idx.in_memory_chunk_count().await, before);
assert!(!idx.chunks_evicted.load(Ordering::Relaxed));
}