use std::path::Path;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::time::Instant;
use rayon::prelude::*;
use tracing::{info_span, instrument, trace, warn};
use crate::backend::{EmbedBackend, Encoding};
use crate::chunk::{ChunkConfig, CodeChunk};
pub const DEFAULT_BATCH_SIZE: usize = 32;
const STREAMING_THRESHOLD: usize = 1000;
const RING_SIZE: usize = 4;
#[derive(Debug, Clone)]
pub struct SearchConfig {
pub batch_size: usize,
pub max_tokens: usize,
pub chunk: ChunkConfig,
pub text_mode: bool,
pub cascade_dim: Option<usize>,
pub file_type: Option<String>,
pub mode: crate::hybrid::SearchMode,
}
impl Default for SearchConfig {
fn default() -> Self {
Self {
batch_size: DEFAULT_BATCH_SIZE,
max_tokens: 0,
chunk: ChunkConfig::default(),
text_mode: false,
cascade_dim: None,
file_type: None,
mode: crate::hybrid::SearchMode::Hybrid,
}
}
}
#[derive(Debug, Clone)]
pub struct SearchResult {
pub chunk: CodeChunk,
pub similarity: f32,
}
#[instrument(skip_all, fields(root = %root.display(), batch_size = cfg.batch_size))]
pub fn embed_all(
root: &Path,
backends: &[&dyn EmbedBackend],
tokenizer: &tokenizers::Tokenizer,
cfg: &SearchConfig,
profiler: &crate::profile::Profiler,
) -> crate::Result<(Vec<CodeChunk>, Vec<Vec<f32>>)> {
if backends.is_empty() {
return Err(crate::Error::Other(anyhow::anyhow!(
"no embedding backends provided"
)));
}
let files = {
let _span = info_span!("walk").entered();
let guard = profiler.phase("walk");
let files = crate::walk::collect_files(root, cfg.file_type.as_deref());
guard.set_detail(format!("{} files", files.len()));
files
};
if files.len() >= STREAMING_THRESHOLD {
let total_bytes: u64 = files
.iter()
.filter_map(|p| p.metadata().ok())
.map(|m| m.len())
.sum();
embed_all_streaming(&files, total_bytes, backends, tokenizer, cfg, profiler)
} else {
embed_all_batch(&files, backends, tokenizer, cfg, profiler)
}
}
fn embed_all_batch(
files: &[std::path::PathBuf],
backends: &[&dyn EmbedBackend],
tokenizer: &tokenizers::Tokenizer,
cfg: &SearchConfig,
profiler: &crate::profile::Profiler,
) -> crate::Result<(Vec<CodeChunk>, Vec<Vec<f32>>)> {
let chunks: Vec<CodeChunk> = {
let _span = info_span!("chunk", file_count = files.len()).entered();
let chunk_start = Instant::now();
let text_mode = cfg.text_mode;
let result: Vec<CodeChunk> = files
.par_iter()
.flat_map(|path| {
let Some(source) = read_source(path) else {
return vec![];
};
let chunks =
crate::chunk::chunk_source_for_path(path, &source, text_mode, &cfg.chunk);
profiler.chunk_thread_report(chunks.len());
profiler.chunk_batch(&chunks);
chunks
})
.collect();
profiler.chunk_summary(result.len(), files.len(), chunk_start.elapsed());
result
};
let bs = cfg.batch_size.max(1);
let max_tokens_cfg = cfg.max_tokens;
let model_max = backends[0].max_tokens();
let _span = info_span!("embed_chunks", chunk_count = chunks.len(), batch_size = bs).entered();
profiler.embed_begin(chunks.len());
let all_encodings: Vec<Option<Encoding>> = chunks
.par_iter()
.map(|chunk| {
tokenize(
&chunk.enriched_content,
tokenizer,
max_tokens_cfg,
model_max,
)
.inspect_err(|e| {
warn!(file = %chunk.file_path, err = %e, "tokenization failed, skipping chunk");
})
.ok()
})
.collect();
let mut paired: Vec<(CodeChunk, Option<Encoding>)> =
chunks.into_iter().zip(all_encodings).collect();
paired.sort_by(|a, b| {
let len_a = a.1.as_ref().map_or(0, |e| e.input_ids.len());
let len_b = b.1.as_ref().map_or(0, |e| e.input_ids.len());
len_b.cmp(&len_a) });
let (chunks, sorted_encodings): (Vec<CodeChunk>, Vec<Option<Encoding>>) =
paired.into_iter().unzip();
let embeddings = embed_distributed(&sorted_encodings, backends, bs, profiler)?;
profiler.embed_done();
let (chunks, embeddings): (Vec<_>, Vec<_>) = chunks
.into_iter()
.zip(embeddings)
.filter(|(_, emb)| !emb.is_empty())
.unzip();
Ok((chunks, embeddings))
}
#[expect(
clippy::too_many_lines,
reason = "streaming pipeline has inherent complexity in thread coordination"
)]
fn embed_all_streaming(
files: &[std::path::PathBuf],
total_bytes: u64,
backends: &[&dyn EmbedBackend],
tokenizer: &tokenizers::Tokenizer,
cfg: &SearchConfig,
profiler: &crate::profile::Profiler,
) -> crate::Result<(Vec<CodeChunk>, Vec<Vec<f32>>)> {
use crossbeam_channel::bounded;
let bs = cfg.batch_size.max(1);
let max_tokens_cfg = cfg.max_tokens;
let model_max = backends[0].max_tokens();
let file_count = files.len();
let text_mode = cfg.text_mode;
let chunk_config = cfg.chunk.clone();
let (chunk_tx, chunk_rx) = bounded::<CodeChunk>(bs * 8);
let (batch_tx, batch_rx) = bounded::<Vec<(Encoding, CodeChunk)>>(RING_SIZE);
let total_chunks_produced = AtomicUsize::new(0);
let bytes_chunked = AtomicUsize::new(0);
let chunk_start = Instant::now();
std::thread::scope(|scope| {
scope.spawn(|| {
let _span = info_span!("chunk_stream", file_count).entered();
files.par_iter().for_each(|path| {
let Some(source) = read_source(path) else {
return;
};
let chunks =
crate::chunk::chunk_source_for_path(path, &source, text_mode, &chunk_config);
let n = chunks.len();
let file_bytes = source.len();
profiler.chunk_batch(&chunks);
for chunk in chunks {
if chunk_tx.send(chunk).is_err() {
return;
}
}
profiler.chunk_thread_report(n);
total_chunks_produced.fetch_add(n, Ordering::Relaxed);
bytes_chunked.fetch_add(file_bytes, Ordering::Relaxed);
});
drop(chunk_tx);
});
let tokenize_handle = scope.spawn(move || -> crate::Result<()> {
let _span = info_span!("tokenize_stream").entered();
let mut buffer: Vec<(Encoding, CodeChunk)> = Vec::with_capacity(bs);
for chunk in &chunk_rx {
match tokenize(
&chunk.enriched_content,
tokenizer,
max_tokens_cfg,
model_max,
) {
Ok(encoding) => {
buffer.push((encoding, chunk));
if buffer.len() >= bs {
buffer.sort_by_key(|b| std::cmp::Reverse(b.0.input_ids.len()));
let batch = std::mem::replace(&mut buffer, Vec::with_capacity(bs));
if batch_tx.send(batch).is_err() {
return Ok(());
}
}
}
Err(e) => {
warn!(
file = %chunk.file_path, err = %e,
"tokenization failed, skipping chunk"
);
}
}
}
if !buffer.is_empty() {
buffer.sort_by_key(|b| std::cmp::Reverse(b.0.input_ids.len()));
let _ = batch_tx.send(buffer);
}
Ok(())
});
let _span = info_span!("embed_stream").entered();
profiler.embed_begin(0);
let mut all_chunks: Vec<CodeChunk> = Vec::new();
let mut all_embeddings: Vec<Vec<f32>> = Vec::new();
let mut embed_error: Option<crate::Error> = None;
let mut cumulative_done: usize = 0;
for batch in &batch_rx {
let batch_len = batch.len();
let (encodings, chunks): (Vec<Encoding>, Vec<CodeChunk>) = batch.into_iter().unzip();
let opt_encodings: Vec<Option<Encoding>> = encodings.into_iter().map(Some).collect();
let noop = crate::profile::Profiler::noop();
match embed_distributed(&opt_encodings, backends, bs, &noop) {
Ok(batch_embeddings) => {
profiler.embedding_batch(&batch_embeddings);
cumulative_done += batch_len;
let processed = bytes_chunked.load(Ordering::Relaxed) as u64;
profiler.embed_tick_bytes(cumulative_done, processed, total_bytes);
for (chunk, emb) in chunks.into_iter().zip(batch_embeddings) {
if !emb.is_empty() {
all_chunks.push(chunk);
all_embeddings.push(emb);
}
}
}
Err(e) => {
embed_error = Some(e);
break;
}
}
}
let final_total = total_chunks_produced.load(Ordering::Relaxed);
profiler.chunk_summary(final_total, file_count, chunk_start.elapsed());
profiler.embed_begin_update_total(cumulative_done);
profiler.embed_tick(cumulative_done);
profiler.embed_done();
let tokenize_result = tokenize_handle.join();
if let Some(e) = embed_error {
return Err(e);
}
match tokenize_result {
Ok(Ok(())) => {}
Ok(Err(e)) => return Err(e),
Err(_) => {
return Err(crate::Error::Other(anyhow::anyhow!(
"tokenize thread panicked"
)));
}
}
Ok((all_chunks, all_embeddings))
})
}
#[instrument(skip_all, fields(root = %root.display(), top_k, batch_size = cfg.batch_size))]
pub fn search(
root: &Path,
query: &str,
backends: &[&dyn EmbedBackend],
tokenizer: &tokenizers::Tokenizer,
top_k: usize,
cfg: &SearchConfig,
profiler: &crate::profile::Profiler,
) -> crate::Result<Vec<SearchResult>> {
if backends.is_empty() {
return Err(crate::Error::Other(anyhow::anyhow!(
"no embedding backends provided"
)));
}
let (chunks, embeddings) = embed_all(root, backends, tokenizer, cfg, profiler)?;
let t_query_start = std::time::Instant::now();
let hybrid = {
let _span = info_span!("build_hybrid_index").entered();
let _guard = profiler.phase("build_hybrid_index");
crate::hybrid::HybridIndex::new(chunks, &embeddings, cfg.cascade_dim)?
};
let mode = cfg.mode;
let effective_top_k = if top_k > 0 { top_k } else { usize::MAX };
let query_embedding = if mode == crate::hybrid::SearchMode::Keyword {
let dim = hybrid.semantic.hidden_dim;
vec![0.0f32; dim]
} else {
let _span = info_span!("embed_query").entered();
let _guard = profiler.phase("embed_query");
let t_tok = std::time::Instant::now();
let enc = tokenize(query, tokenizer, cfg.max_tokens, backends[0].max_tokens())?;
let tok_ms = t_tok.elapsed().as_secs_f64() * 1000.0;
let t_emb = std::time::Instant::now();
let mut results = backends[0].embed_batch(&[enc])?;
let emb_ms = t_emb.elapsed().as_secs_f64() * 1000.0;
eprintln!(
"[search] query: tokenize={tok_ms:.1}ms embed={emb_ms:.1}ms total_since_embed_all={:.1}ms",
t_query_start.elapsed().as_secs_f64() * 1000.0
);
results.pop().ok_or_else(|| {
crate::Error::Other(anyhow::anyhow!("backend returned no embedding for query"))
})?
};
let ranked = {
let _span = info_span!("rank", chunk_count = hybrid.chunks().len()).entered();
let guard = profiler.phase("rank");
let threshold = 0.0; let results = hybrid.search(&query_embedding, query, effective_top_k, threshold, mode);
guard.set_detail(format!(
"{mode} top {} from {}",
effective_top_k.min(results.len()),
hybrid.chunks().len()
));
results
};
let results: Vec<SearchResult> = ranked
.into_iter()
.map(|(idx, score)| SearchResult {
chunk: hybrid.chunks()[idx].clone(),
similarity: score,
})
.collect();
Ok(results)
}
struct DistributedState<'a> {
tokenized: &'a [Option<Encoding>],
cursor: std::sync::atomic::AtomicUsize,
error_flag: std::sync::atomic::AtomicBool,
first_error: std::sync::Mutex<Option<crate::Error>>,
done_counter: std::sync::atomic::AtomicUsize,
batch_size: usize,
profiler: &'a crate::profile::Profiler,
}
impl DistributedState<'_> {
fn run_worker(&self, backend: &dyn EmbedBackend) -> Vec<(usize, Vec<f32>)> {
use std::sync::atomic::Ordering;
let n = self.tokenized.len();
let grab_size = if backend.is_gpu() {
self.batch_size * 4
} else {
self.batch_size
};
let mut results = Vec::new();
loop {
if self.error_flag.load(Ordering::Relaxed) {
break;
}
let start = self.cursor.fetch_add(grab_size, Ordering::Relaxed);
if start >= n {
break;
}
let end = (start + grab_size).min(n);
let batch = &self.tokenized[start..end];
let mut valid = Vec::with_capacity(batch.len());
let mut valid_indices = Vec::with_capacity(batch.len());
for (i, enc) in batch.iter().enumerate() {
if let Some(e) = enc {
valid.push(e.clone());
valid_indices.push(start + i);
} else {
results.push((start + i, vec![]));
}
}
if valid.is_empty() {
let done =
self.done_counter.fetch_add(batch.len(), Ordering::Relaxed) + batch.len();
self.profiler.embed_tick(done);
continue;
}
match backend.embed_batch(&valid) {
Ok(batch_embeddings) => {
self.profiler.embedding_batch(&batch_embeddings);
for (idx, emb) in valid_indices.into_iter().zip(batch_embeddings) {
results.push((idx, emb));
}
let done =
self.done_counter.fetch_add(batch.len(), Ordering::Relaxed) + batch.len();
self.profiler.embed_tick(done);
}
Err(e) => {
self.error_flag.store(true, Ordering::Relaxed);
if let Ok(mut guard) = self.first_error.lock()
&& guard.is_none()
{
*guard = Some(e);
}
break;
}
}
}
results
}
}
#[expect(
unsafe_code,
reason = "BLAS thread count must be set via env vars before spawning workers"
)]
pub(crate) fn embed_distributed(
tokenized: &[Option<Encoding>],
backends: &[&dyn EmbedBackend],
batch_size: usize,
profiler: &crate::profile::Profiler,
) -> crate::Result<Vec<Vec<f32>>> {
let n = tokenized.len();
let state = DistributedState {
tokenized,
cursor: std::sync::atomic::AtomicUsize::new(0),
error_flag: std::sync::atomic::AtomicBool::new(false),
first_error: std::sync::Mutex::new(None),
done_counter: std::sync::atomic::AtomicUsize::new(0),
batch_size: batch_size.max(1),
profiler,
};
let all_pairs: Vec<(usize, Vec<f32>)> =
if backends.len() == 1 && backends[0].supports_clone() && !backends[0].is_gpu() {
unsafe {
std::env::set_var("OPENBLAS_NUM_THREADS", "1");
std::env::set_var("MKL_NUM_THREADS", "1");
std::env::set_var("VECLIB_MAXIMUM_THREADS", "1");
#[cfg(all(not(target_os = "macos"), feature = "cpu"))]
{
unsafe extern "C" {
fn openblas_set_num_threads(num: std::ffi::c_int);
}
openblas_set_num_threads(1);
}
}
let num_workers = rayon::current_num_threads().max(1);
std::thread::scope(|s| {
let handles: Vec<_> = (0..num_workers)
.map(|_| {
s.spawn(|| {
#[cfg(any(feature = "cpu", feature = "cpu-accelerate"))]
crate::backend::driver::cpu::force_single_threaded_blas();
let cloned = backends[0].clone_backend();
state.run_worker(cloned.as_ref())
})
})
.collect();
let mut all = Vec::new();
for handle in handles {
if let Ok(pairs) = handle.join() {
all.extend(pairs);
}
}
all
})
} else if backends.len() == 1 {
state.run_worker(backends[0])
} else {
std::thread::scope(|s| {
let handles: Vec<_> = backends
.iter()
.map(|&backend| {
s.spawn(|| {
if backend.supports_clone() {
let cloned = backend.clone_backend();
state.run_worker(cloned.as_ref())
} else {
state.run_worker(backend)
}
})
})
.collect();
let mut all = Vec::new();
for handle in handles {
if let Ok(pairs) = handle.join() {
all.extend(pairs);
} else {
warn!("worker thread panicked");
state
.error_flag
.store(true, std::sync::atomic::Ordering::Relaxed);
}
}
all
})
};
if let Some(err) = state.first_error.into_inner().ok().flatten() {
return Err(err);
}
let mut embeddings: Vec<Vec<f32>> = vec![vec![]; n];
for (idx, emb) in all_pairs {
embeddings[idx] = emb;
}
Ok(embeddings)
}
pub(crate) fn read_source(path: &Path) -> Option<String> {
let bytes = match std::fs::read(path) {
Ok(b) => b,
Err(e) => {
trace!(path = %path.display(), err = %e, "skipping file: read failed");
return None;
}
};
if memchr::memchr(0, &bytes[..bytes.len().min(8192)]).is_some() {
trace!(path = %path.display(), "skipping binary file");
return None;
}
match std::str::from_utf8(&bytes) {
Ok(s) => Some(s.to_string()),
Err(e) => {
trace!(path = %path.display(), err = %e, "skipping file: not valid UTF-8");
None
}
}
}
fn tokenize(
text: &str,
tokenizer: &tokenizers::Tokenizer,
max_tokens: usize,
model_max_tokens: usize,
) -> crate::Result<Encoding> {
let mut enc = crate::tokenize::tokenize_query(text, tokenizer, model_max_tokens)?;
if max_tokens > 0 {
let len = enc.input_ids.len().min(max_tokens);
enc.input_ids.truncate(len);
enc.attention_mask.truncate(len);
enc.token_type_ids.truncate(len);
}
Ok(enc)
}
pub fn apply_structural_boost<S: ::std::hash::BuildHasher>(
results: &mut [SearchResult],
file_ranks: &std::collections::HashMap<String, f32, S>,
alpha: f32,
) {
if results.is_empty() || alpha == 0.0 {
return;
}
let min = results
.iter()
.map(|r| r.similarity)
.fold(f32::INFINITY, f32::min);
let max = results
.iter()
.map(|r| r.similarity)
.fold(f32::NEG_INFINITY, f32::max);
let range = (max - min).max(1e-12);
for r in results.iter_mut() {
let normalized = (r.similarity - min) / range;
let pr = file_ranks.get(&r.chunk.file_path).copied().unwrap_or(0.0);
r.similarity = normalized + alpha * pr;
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
#[cfg(feature = "cpu")]
#[ignore = "loads model + embeds full source tree; run with `cargo test -- --ignored`"]
fn search_with_backend_trait() {
let backend = crate::backend::load_backend(
crate::backend::BackendKind::Cpu,
"BAAI/bge-small-en-v1.5",
crate::backend::DeviceHint::Cpu,
)
.unwrap();
let tokenizer = crate::tokenize::load_tokenizer("BAAI/bge-small-en-v1.5").unwrap();
let cfg = SearchConfig::default();
let profiler = crate::profile::Profiler::noop();
let dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("src");
let results = search(
&dir,
"embedding model",
&[backend.as_ref()],
&tokenizer,
1,
&cfg,
&profiler,
);
assert!(results.is_ok());
assert!(!results.unwrap().is_empty());
}
#[test]
#[cfg(feature = "cpu")]
fn embed_distributed_produces_correct_count() {
let backend = crate::backend::load_backend(
crate::backend::BackendKind::Cpu,
"BAAI/bge-small-en-v1.5",
crate::backend::DeviceHint::Cpu,
)
.unwrap();
let tokenizer = crate::tokenize::load_tokenizer("BAAI/bge-small-en-v1.5").unwrap();
let profiler = crate::profile::Profiler::noop();
let texts = ["fn hello() {}", "class Foo:", "func main() {}"];
let encoded: Vec<Option<Encoding>> = texts
.iter()
.map(|t| super::tokenize(t, &tokenizer, 0, 512).ok())
.collect();
let results =
super::embed_distributed(&encoded, &[backend.as_ref()], 32, &profiler).unwrap();
assert_eq!(results.len(), 3);
for (i, emb) in results.iter().enumerate() {
assert_eq!(emb.len(), 384, "embedding {i} should be 384-dim");
}
}
fn truncate_and_normalize(emb: &[f32], dims: usize) -> Vec<f32> {
let trunc = &emb[..dims];
let norm: f32 = trunc.iter().map(|x| x * x).sum::<f32>().sqrt().max(1e-12);
trunc.iter().map(|x| x / norm).collect()
}
fn rank_topk(query: &[f32], corpus: &[Vec<f32>], k: usize) -> Vec<usize> {
let mut scored: Vec<(usize, f32)> = corpus
.iter()
.enumerate()
.map(|(i, emb)| {
let dot: f32 = query.iter().zip(emb).map(|(a, b)| a * b).sum();
(i, dot)
})
.collect();
scored.sort_unstable_by(|a, b| b.1.total_cmp(&a.1));
scored.into_iter().take(k).map(|(i, _)| i).collect()
}
#[test]
#[ignore = "loads model + embeds; run with --nocapture"]
#[expect(
clippy::cast_precision_loss,
reason = "top_k and overlap are small counts"
)]
fn mrl_retrieval_recall() {
let model = "BAAI/bge-small-en-v1.5";
let backends = crate::backend::detect_backends(model).unwrap();
let tokenizer = crate::tokenize::load_tokenizer(model).unwrap();
let cfg = SearchConfig::default();
let profiler = crate::profile::Profiler::noop();
let root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.parent()
.unwrap();
eprintln!("Embedding {}", root.display());
let backend_refs: Vec<&dyn crate::backend::EmbedBackend> =
backends.iter().map(std::convert::AsRef::as_ref).collect();
let (chunks, embeddings) =
embed_all(root, &backend_refs, &tokenizer, &cfg, &profiler).unwrap();
let full_dim = embeddings[0].len();
eprintln!(
"Corpus: {} chunks, {full_dim}-dim embeddings\n",
chunks.len()
);
let queries = [
"error handling in the embedding pipeline",
"tree-sitter chunking and AST parsing",
"Metal GPU kernel dispatch",
"file watcher for incremental reindex",
"cosine similarity ranking",
];
let top_k = 10;
let mrl_dims: Vec<usize> = [32, 64, 128, 192, 256, full_dim]
.into_iter()
.filter(|&d| d <= full_dim)
.collect();
eprintln!("=== MRL Retrieval Recall@{top_k} (vs full {full_dim}-dim) ===\n");
for query in &queries {
let enc = tokenize(query, &tokenizer, 0, backends[0].max_tokens()).unwrap();
let query_emb = backends[0].embed_batch(&[enc]).unwrap().pop().unwrap();
let ref_topk = rank_topk(&query_emb, &embeddings, top_k);
eprintln!("Query: \"{query}\"");
eprintln!(
" Full-dim top-1: {} ({})",
chunks[ref_topk[0]].name, chunks[ref_topk[0]].file_path
);
for &dims in &mrl_dims {
let trunc_corpus: Vec<Vec<f32>> = embeddings
.iter()
.map(|e| truncate_and_normalize(e, dims))
.collect();
let trunc_query = truncate_and_normalize(&query_emb, dims);
let trunc_topk = rank_topk(&trunc_query, &trunc_corpus, top_k);
let overlap = ref_topk.iter().filter(|i| trunc_topk.contains(i)).count();
let recall = overlap as f32 / top_k as f32;
let marker = if dims == full_dim {
" (ref)"
} else if recall >= 0.8 {
" ***"
} else {
""
};
eprintln!(
" dims={dims:>3}: Recall@{top_k}={recall:.1} ({overlap}/{top_k}){marker}"
);
}
eprintln!();
}
}
fn make_result(file_path: &str, similarity: f32) -> SearchResult {
SearchResult {
chunk: CodeChunk {
file_path: file_path.to_string(),
name: "test".to_string(),
kind: "function".to_string(),
start_line: 1,
end_line: 10,
enriched_content: String::new(),
content: String::new(),
},
similarity,
}
}
#[test]
fn structural_boost_normalizes_and_applies() {
let mut results = vec![
make_result("src/a.rs", 0.8),
make_result("src/b.rs", 0.4),
make_result("src/c.rs", 0.6),
];
let mut ranks = std::collections::HashMap::new();
ranks.insert("src/a.rs".to_string(), 0.5);
ranks.insert("src/b.rs".to_string(), 1.0);
ranks.insert("src/c.rs".to_string(), 0.0);
apply_structural_boost(&mut results, &ranks, 0.2);
assert!((results[0].similarity - 1.1).abs() < 1e-6);
assert!((results[1].similarity - 0.2).abs() < 1e-6);
assert!((results[2].similarity - 0.5).abs() < 1e-6);
}
#[test]
fn structural_boost_noop_on_empty() {
let mut results: Vec<SearchResult> = vec![];
let ranks = std::collections::HashMap::new();
apply_structural_boost(&mut results, &ranks, 0.2);
assert!(results.is_empty());
}
#[test]
fn structural_boost_noop_on_zero_alpha() {
let mut results = vec![make_result("src/a.rs", 0.8)];
let mut ranks = std::collections::HashMap::new();
ranks.insert("src/a.rs".to_string(), 1.0);
apply_structural_boost(&mut results, &ranks, 0.0);
assert!((results[0].similarity - 0.8).abs() < 1e-6);
}
}