use crate::config::Config;
use crate::indexer::branch::BranchManifest;
use crate::store::{CodeBlock, DocumentBlock, Store, TextBlock};
use anyhow::Result;
use std::collections::HashSet;
pub struct BranchSearchContext {
pub store: Store,
pub manifest: BranchManifest,
}
pub struct SearchParams<'a> {
pub mode: &'a str,
pub max_results: usize,
pub similarity_threshold: f32,
pub language_filter: Option<&'a str>,
pub config: &'a Config,
pub branch_ctx: Option<&'a BranchSearchContext>,
}
fn format_symbols_for_display(symbols: &[String]) -> String {
let mut unique_symbols: Vec<&str> = symbols
.iter()
.map(|s| s.as_str())
.filter(|s| !s.contains('_'))
.collect();
unique_symbols.sort();
unique_symbols.dedup();
unique_symbols.join(", ")
}
pub fn render_code_blocks(blocks: &[CodeBlock]) {
render_code_blocks_with_config(blocks, &Config::default(), "partial");
}
pub fn render_code_blocks_with_config(blocks: &[CodeBlock], config: &Config, detail_level: &str) {
if blocks.is_empty() {
println!("No code blocks found for the query.");
return;
}
println!("Found {} code blocks:\n", blocks.len());
for (idx, block) in blocks.iter().enumerate() {
println!(
"╔══════════════════ File: {} ══════════════════",
block.path
);
println!("║");
println!("║ Result {} of {}", idx + 1, blocks.len());
println!("║ Language: {}", block.language);
println!("║ Lines: {}-{}", block.start_line, block.end_line);
if let Some(distance) = block.distance {
println!("║ Similarity: {:.4}", 1.0 - distance);
}
if !block.symbols.is_empty() {
println!("║ Symbols:");
let formatted = format_symbols_for_display(&block.symbols);
if !formatted.is_empty() {
for symbol in formatted.split(", ") {
println!("║ • {}", symbol);
}
}
}
println!("║ Content:");
println!("║ ┌────────────────────────────────────");
match detail_level {
"signatures" => {
let lines: Vec<&str> = block.content.lines().collect();
if !lines.is_empty() {
if let Some(first_line) = lines.first() {
println!("║ │ {:4} │ {}", block.start_line, first_line.trim());
}
}
}
"partial" => {
let lines: Vec<&str> = block.content.lines().collect();
if lines.len() <= 10 {
for (i, line) in lines.iter().enumerate() {
println!("║ │ {:4} │ {}", block.start_line + i + 1, line);
}
} else {
for (i, line) in lines.iter().take(4).enumerate() {
println!("║ │ {:4} │ {}", block.start_line + i + 1, line);
}
let omitted_lines = lines.len() - 7; if omitted_lines > 0 {
println!("║ │ │ ... ({} more lines)", omitted_lines);
}
let last_3_start = lines.len() - 3;
for (i, line) in lines.iter().skip(last_3_start).enumerate() {
println!(
"║ │ {:4} │ {}",
block.start_line + last_3_start + i + 1,
line
);
}
}
}
"full" => {
let max_chars = config.search.search_block_max_characters;
if max_chars > 0 && block.content.len() > max_chars {
let (content, was_truncated) =
crate::indexer::truncate_content_smartly(&block.content, max_chars);
for (i, line) in content.lines().enumerate() {
println!("║ │ {:4} │ {}", block.start_line + i, line);
}
if was_truncated {
println!(
"║ │ │ [Content truncated - limit: {} chars]",
max_chars
);
}
} else {
for (i, line) in block.content.lines().enumerate() {
println!("║ │ {:4} │ {}", block.start_line + i, line);
}
}
}
_ => {
let lines: Vec<&str> = block.content.lines().collect();
if lines.len() <= 10 {
for (i, line) in lines.iter().enumerate() {
println!("║ │ {:4} │ {}", block.start_line + i + 1, line);
}
} else {
for (i, line) in lines.iter().take(4).enumerate() {
println!("║ │ {:4} │ {}", block.start_line + i + 1, line);
}
let omitted_lines = lines.len() - 7;
if omitted_lines > 0 {
println!("║ │ │ ... ({} more lines)", omitted_lines);
}
let last_3_start = lines.len() - 3;
for (i, line) in lines.iter().skip(last_3_start).enumerate() {
println!(
"║ │ {:4} │ {}",
block.start_line + last_3_start + i + 1,
line
);
}
}
}
}
println!("║ └────────────────────────────────────");
println!("╚════════════════════════════════════════\n");
}
}
pub fn render_results_json(results: &[CodeBlock]) -> Result<(), anyhow::Error> {
let json = serde_json::to_string_pretty(results)?;
println!("{}", json);
Ok(())
}
pub async fn expand_symbols(
store: &Store,
code_blocks: Vec<CodeBlock>,
) -> Result<Vec<CodeBlock>, anyhow::Error> {
let mut expanded_blocks = Vec::new();
let mut original_hashes = HashSet::new();
for block in &code_blocks {
expanded_blocks.push(block.clone());
original_hashes.insert(block.hash.clone());
}
let mut symbol_refs = Vec::new();
for block in &code_blocks {
for symbol in &block.symbols {
if !symbol.contains("_") && symbol.chars().next().is_some_and(|c| c.is_alphabetic()) {
symbol_refs.push(symbol.clone());
}
}
}
symbol_refs.sort();
symbol_refs.dedup();
println!("Found {} unique symbols to expand", symbol_refs.len());
let mut additional_blocks = Vec::new();
for symbol in &symbol_refs {
if let Some(block) = store.get_code_block_by_symbol(symbol).await? {
if !original_hashes.contains(&block.hash)
&& !additional_blocks
.iter()
.any(|b: &CodeBlock| b.hash == block.hash)
{
additional_blocks.push(block);
}
}
}
additional_blocks.sort_by(|a, b| {
let a_matches = a.symbols.iter().filter(|s| symbol_refs.contains(s)).count();
let b_matches = b.symbols.iter().filter(|s| symbol_refs.contains(s)).count();
let match_cmp = b_matches.cmp(&a_matches);
if match_cmp == std::cmp::Ordering::Equal {
let path_cmp = a.path.cmp(&b.path);
if path_cmp == std::cmp::Ordering::Equal {
a.start_line.cmp(&b.start_line)
} else {
path_cmp
}
} else {
match_cmp
}
});
expanded_blocks.extend(additional_blocks);
Ok(expanded_blocks)
}
pub fn format_code_search_results_as_text(blocks: &[CodeBlock], detail_level: &str) -> String {
if blocks.is_empty() {
return "No code results found.".to_string();
}
let mut output = String::new();
output.push_str(&format!("CODE RESULTS ({})\n", blocks.len()));
for (idx, block) in blocks.iter().enumerate() {
output.push_str(&format!("{}. {}\n", idx + 1, block.path));
if let Some(distance) = block.distance {
output.push_str(&format!(" | Similarity {:.3}", 1.0 - distance));
}
output.push('\n');
if !block.symbols.is_empty() {
let formatted = format_symbols_for_display(&block.symbols);
if !formatted.is_empty() {
output.push_str(&format!("Symbols: {}\n", formatted));
}
}
match detail_level {
"signatures" => {
let preview =
get_code_preview_with_lines(&block.content, block.start_line, &block.language);
if !preview.is_empty() {
if let Some(first_line) = preview.lines().next() {
output.push_str(&format!("{}\n", first_line));
}
}
}
"partial" => {
let preview =
get_code_preview_with_lines(&block.content, block.start_line, &block.language);
output.push_str(&preview);
if !preview.ends_with('\n') {
output.push('\n');
}
}
"full" => {
let content_with_lines = block
.content
.lines()
.enumerate()
.map(|(i, line)| format!("{}: {}", block.start_line + i, line))
.collect::<Vec<_>>()
.join("\n");
output.push_str(&content_with_lines);
if !content_with_lines.ends_with('\n') {
output.push('\n');
}
}
_ => {}
}
output.push('\n');
}
output
}
pub fn format_text_search_results_as_text(
blocks: &[crate::store::TextBlock],
detail_level: &str,
) -> String {
if blocks.is_empty() {
return "No text results found.".to_string();
}
let mut output = String::new();
output.push_str(&format!("TEXT RESULTS ({})\n", blocks.len()));
for (idx, block) in blocks.iter().enumerate() {
output.push_str(&format!("{}. {}\n", idx + 1, block.path));
if let Some(distance) = block.distance {
output.push_str(&format!(" | Similarity {:.3}", 1.0 - distance));
}
output.push('\n');
match detail_level {
"signatures" => {
let preview = get_text_preview_with_lines(&block.content, block.start_line);
if !preview.is_empty() {
if let Some(first_line) = preview.lines().next() {
output.push_str(&format!("{}\n", first_line));
}
}
}
"partial" => {
let preview = get_text_preview_with_lines(&block.content, block.start_line);
output.push_str(&preview);
if !preview.ends_with('\n') {
output.push('\n');
}
}
"full" => {
let content_with_lines = block
.content
.lines()
.enumerate()
.map(|(i, line)| format!("{}: {}", block.start_line + i, line))
.collect::<Vec<_>>()
.join("\n");
output.push_str(&content_with_lines);
if !content_with_lines.ends_with('\n') {
output.push('\n');
}
}
_ => {}
}
output.push('\n');
}
output
}
pub fn format_doc_search_results_as_text(
blocks: &[crate::store::DocumentBlock],
detail_level: &str,
) -> String {
if blocks.is_empty() {
return "No documentation results found.".to_string();
}
let mut output = String::new();
output.push_str(&format!("DOCUMENTATION RESULTS ({})\n", blocks.len()));
for (idx, block) in blocks.iter().enumerate() {
output.push_str(&format!("{}. {}\n", idx + 1, block.path));
output.push_str(&format!("{} (Level {})", block.title, block.level));
output.push_str(&format!(" | {}-{}", block.start_line, block.end_line));
if let Some(distance) = block.distance {
output.push_str(&format!(" | Similarity {:.3}", 1.0 - distance));
}
output.push('\n');
match detail_level {
"signatures" => {
let preview = get_doc_preview_with_lines(&block.content, block.start_line);
if !preview.is_empty() {
if let Some(first_line) = preview.lines().next() {
output.push_str(&format!("{}\n", first_line));
}
}
}
"partial" => {
let preview = get_doc_preview_with_lines(&block.content, block.start_line);
output.push_str(&preview);
if !preview.ends_with('\n') {
output.push('\n');
}
}
"full" => {
let content_with_lines = block
.content
.lines()
.enumerate()
.map(|(i, line)| format!("{}: {}", block.start_line + i, line))
.collect::<Vec<_>>()
.join("\n");
output.push_str(&content_with_lines);
if !content_with_lines.ends_with('\n') {
output.push('\n');
}
}
_ => {}
}
}
output
}
pub fn format_commit_search_results_as_text(
blocks: &[crate::store::CommitBlock],
detail_level: &str,
) -> String {
if blocks.is_empty() {
return "No commit results found.".to_string();
}
let mut output = String::new();
output.push_str(&format!("COMMIT RESULTS ({})\n", blocks.len()));
for (idx, block) in blocks.iter().enumerate() {
let display_hash = if detail_level == "full" {
block.hash.as_str()
} else {
&block.hash[..8.min(block.hash.len())]
};
let date = chrono::DateTime::from_timestamp(block.date, 0)
.map(|dt| dt.format("%Y-%m-%d").to_string())
.unwrap_or_else(|| block.date.to_string());
output.push_str(&format!(
"{}. {} ({}) by {}\n",
idx + 1,
display_hash,
date,
block.author,
));
if let Some(distance) = block.distance {
output.push_str(&format!(" Similarity: {:.3}\n", 1.0 - distance));
}
let subject = block.message.lines().next().unwrap_or(&block.message);
match detail_level {
"signatures" => {
output.push_str(&format!(" Message: {}\n", subject));
}
"full" => {
output.push_str(&format!(" Message: {}\n", block.message));
let files: Vec<String> = serde_json::from_str(&block.files).unwrap_or_default();
if !files.is_empty() {
output.push_str(&format!(" Files: {}\n", files.join(", ")));
}
if !block.description.is_empty() {
output.push_str(&format!(" Description: {}\n", block.description));
}
}
_ => {
output.push_str(&format!(" Message: {}\n", subject));
let files: Vec<String> = serde_json::from_str(&block.files).unwrap_or_default();
if !files.is_empty() {
output.push_str(&format!(" Files: {}\n", files.join(", ")));
}
if !block.description.is_empty() {
output.push_str(&format!(" Description: {}\n", block.description));
}
}
}
output.push('\n');
}
output
}
pub fn format_combined_search_results_as_text(
code_blocks: &[CodeBlock],
text_blocks: &[crate::store::TextBlock],
doc_blocks: &[crate::store::DocumentBlock],
detail_level: &str,
) -> String {
let total_results = code_blocks.len() + text_blocks.len() + doc_blocks.len();
if total_results == 0 {
return "No results found.".to_string();
}
let mut output = String::new();
output.push_str(&format!("SEARCH RESULTS ({} total)\n\n", total_results));
if !doc_blocks.is_empty() {
output.push_str(&format_doc_search_results_as_text(doc_blocks, detail_level));
output.push('\n');
}
if !code_blocks.is_empty() {
output.push_str(&format_code_search_results_as_text(
code_blocks,
detail_level,
));
output.push('\n');
}
if !text_blocks.is_empty() {
output.push_str(&format_text_search_results_as_text(
text_blocks,
detail_level,
));
}
output
}
pub async fn detect_branch_search_context() -> Option<BranchSearchContext> {
let current_dir = std::env::current_dir().ok()?;
let branch_name = crate::indexer::branch::detect_branch_context(¤t_dir)?;
let branch_dir = crate::storage::get_branch_dir(¤t_dir, &branch_name).ok()?;
let manifest = crate::indexer::branch::load_manifest(&branch_dir).ok()??;
let branch_store = Store::new_for_branch(&branch_name).await.ok()?;
Some(BranchSearchContext {
store: branch_store,
manifest,
})
}
pub async fn search_codebase_with_details_text(
query: &str,
mode: &str,
detail_level: &str,
max_results: usize,
similarity_threshold: f32,
language_filter: Option<&str>,
config: &Config,
) -> Result<String> {
let store = Store::new().await?;
let branch_ctx = detect_branch_search_context().await;
let search_embeddings =
crate::embedding::generate_search_embeddings(query, mode, config).await?;
let distance_threshold = if config.search.reranker.enabled {
None
} else {
Some(1.0 - similarity_threshold)
};
let candidate_limit = if config.search.reranker.enabled {
config.search.reranker.top_k_candidates
} else {
max_results
};
let main_limit = if branch_ctx.is_some() {
candidate_limit * 2
} else {
candidate_limit
};
let (mut code_blocks, mut text_blocks, mut doc_blocks, mut commit_blocks) = match mode {
"code" => {
let embeddings = search_embeddings.code_embeddings.ok_or_else(|| {
anyhow::anyhow!("No code embeddings generated for code search mode")
})?;
let mut results = store
.get_code_blocks_with_language_filter(
embeddings.clone(),
Some(main_limit),
distance_threshold,
language_filter,
)
.await?;
if let Some(ref ctx) = branch_ctx {
let branch_results = ctx
.store
.get_code_blocks_with_language_filter(
embeddings,
Some(candidate_limit),
distance_threshold,
language_filter,
)
.await
.unwrap_or_default();
let overridden = ctx.manifest.overridden_paths();
results =
merge_branch_code_blocks(results, branch_results, &overridden, candidate_limit);
}
(results, vec![], vec![], vec![])
}
"text" => {
let embeddings = search_embeddings.text_embeddings.ok_or_else(|| {
anyhow::anyhow!("No text embeddings generated for text search mode")
})?;
let mut results = store
.get_text_blocks_with_config(
embeddings.clone(),
Some(main_limit),
distance_threshold,
)
.await?;
if let Some(ref ctx) = branch_ctx {
let branch_results = ctx
.store
.get_text_blocks_with_config(
embeddings,
Some(candidate_limit),
distance_threshold,
)
.await
.unwrap_or_default();
let overridden = ctx.manifest.overridden_paths();
results =
merge_branch_text_blocks(results, branch_results, &overridden, candidate_limit);
}
(vec![], results, vec![], vec![])
}
"docs" => {
let embeddings = search_embeddings.text_embeddings.ok_or_else(|| {
anyhow::anyhow!("No text embeddings generated for docs search mode")
})?;
let mut results = store
.get_document_blocks_with_config(
embeddings.clone(),
Some(main_limit),
distance_threshold,
)
.await?;
if let Some(ref ctx) = branch_ctx {
let branch_results = ctx
.store
.get_document_blocks_with_config(
embeddings,
Some(candidate_limit),
distance_threshold,
)
.await
.unwrap_or_default();
let overridden = ctx.manifest.overridden_paths();
results =
merge_branch_doc_blocks(results, branch_results, &overridden, candidate_limit);
}
(vec![], vec![], results, vec![])
}
"commits" => {
let embeddings = search_embeddings.text_embeddings.ok_or_else(|| {
anyhow::anyhow!("No text embeddings generated for commits search mode")
})?;
let results = store
.get_commit_blocks_with_config(
embeddings,
Some(candidate_limit),
distance_threshold,
)
.await?;
(vec![], vec![], vec![], results)
}
"all" => {
let code_embeddings = search_embeddings.code_embeddings.ok_or_else(|| {
anyhow::anyhow!("No code embeddings generated for all search mode")
})?;
let text_embeddings = search_embeddings.text_embeddings.ok_or_else(|| {
anyhow::anyhow!("No text embeddings generated for all search mode")
})?;
let results_per_type = candidate_limit.div_ceil(3);
let main_rpt = if branch_ctx.is_some() {
results_per_type * 2
} else {
results_per_type
};
let mut code_results = store
.get_code_blocks_with_language_filter(
code_embeddings.clone(),
Some(main_rpt),
distance_threshold,
language_filter,
)
.await?;
let mut text_results = store
.get_text_blocks_with_config(
text_embeddings.clone(),
Some(main_rpt),
distance_threshold,
)
.await?;
let mut doc_results = store
.get_document_blocks_with_config(
text_embeddings.clone(),
Some(main_rpt),
distance_threshold,
)
.await?;
if let Some(ref ctx) = branch_ctx {
let overridden = ctx.manifest.overridden_paths();
let bc = ctx
.store
.get_code_blocks_with_language_filter(
code_embeddings,
Some(results_per_type),
distance_threshold,
language_filter,
)
.await
.unwrap_or_default();
let bt = ctx
.store
.get_text_blocks_with_config(
text_embeddings.clone(),
Some(results_per_type),
distance_threshold,
)
.await
.unwrap_or_default();
let bd = ctx
.store
.get_document_blocks_with_config(
text_embeddings,
Some(results_per_type),
distance_threshold,
)
.await
.unwrap_or_default();
code_results =
merge_branch_code_blocks(code_results, bc, &overridden, results_per_type);
text_results =
merge_branch_text_blocks(text_results, bt, &overridden, results_per_type);
doc_results =
merge_branch_doc_blocks(doc_results, bd, &overridden, results_per_type);
}
(code_results, text_results, doc_results, vec![])
}
_ => {
return Err(anyhow::anyhow!(
"Invalid search mode '{}'. Use 'all', 'code', 'docs', 'commits', or 'text'.",
mode
))
}
};
if config.search.reranker.enabled {
code_blocks = crate::reranker::rerank_code_blocks_with_octolib(
query,
code_blocks,
&config.search.reranker,
)
.await?;
text_blocks = crate::reranker::rerank_text_blocks_with_octolib(
query,
text_blocks,
&config.search.reranker,
)
.await?;
doc_blocks = crate::reranker::rerank_doc_blocks_with_octolib(
query,
doc_blocks,
&config.search.reranker,
)
.await?;
commit_blocks = crate::reranker::rerank_commit_blocks_with_octolib(
query,
commit_blocks,
&config.search.reranker,
)
.await?;
let dist_thresh = 1.0 - similarity_threshold;
code_blocks.retain(|b| b.distance.is_none_or(|d| d <= dist_thresh));
text_blocks.retain(|b| b.distance.is_none_or(|d| d <= dist_thresh));
doc_blocks.retain(|b| b.distance.is_none_or(|d| d <= dist_thresh));
commit_blocks.retain(|b| b.distance.is_none_or(|d| d <= dist_thresh));
} else {
code_blocks.truncate(max_results);
text_blocks.truncate(max_results);
doc_blocks.truncate(max_results);
commit_blocks.truncate(max_results);
}
match mode {
"code" => Ok(format_code_search_results_as_text(
&code_blocks,
detail_level,
)),
"text" => Ok(format_text_search_results_as_text(
&text_blocks,
detail_level,
)),
"docs" => Ok(format_doc_search_results_as_text(&doc_blocks, detail_level)),
"commits" => Ok(format_commit_search_results_as_text(
&commit_blocks,
detail_level,
)),
"all" => Ok(format_combined_search_results_as_text(
&code_blocks,
&text_blocks,
&doc_blocks,
detail_level,
)),
_ => Err(anyhow::anyhow!(
"Invalid search mode '{}'. Use 'all', 'code', 'docs', 'commits', or 'text'.",
mode
)),
}
}
pub async fn search_codebase_with_details_multi_query_text(
queries: &[String],
mode: &str,
detail_level: &str,
max_results: usize,
similarity_threshold: f32,
language_filter: Option<&str>,
config: &Config,
) -> Result<String> {
let store = Store::new().await?;
let branch_ctx = detect_branch_search_context().await;
if queries.is_empty() {
return Err(anyhow::anyhow!("At least one query is required"));
}
if queries.len() > octolib::embedding::constants::MAX_QUERIES {
return Err(anyhow::anyhow!(
"Maximum {} queries allowed, got {}. Use fewer, more specific terms.",
crate::constants::MAX_QUERIES,
queries.len()
));
}
let embeddings = generate_batch_embeddings_for_queries(queries, mode, config).await?;
let query_embeddings: Vec<_> = queries
.iter()
.cloned()
.zip(embeddings.into_iter())
.collect();
let dedup_distance_threshold = if config.search.reranker.enabled {
None
} else {
Some(1.0 - similarity_threshold)
};
let search_results = execute_parallel_searches(
&store,
query_embeddings,
&SearchParams {
mode,
max_results,
similarity_threshold,
language_filter,
config,
branch_ctx: branch_ctx.as_ref(),
},
)
.await?;
let (mut code_blocks, mut doc_blocks, mut text_blocks, mut commit_blocks) =
deduplicate_and_merge_results(search_results, queries, dedup_distance_threshold);
if config.search.reranker.enabled && !queries.is_empty() {
let query = queries.join(" ");
code_blocks = crate::reranker::rerank_code_blocks_with_octolib(
&query,
code_blocks,
&config.search.reranker,
)
.await?;
doc_blocks = crate::reranker::rerank_doc_blocks_with_octolib(
&query,
doc_blocks,
&config.search.reranker,
)
.await?;
text_blocks = crate::reranker::rerank_text_blocks_with_octolib(
&query,
text_blocks,
&config.search.reranker,
)
.await?;
commit_blocks = crate::reranker::rerank_commit_blocks_with_octolib(
&query,
commit_blocks,
&config.search.reranker,
)
.await?;
let dist_thresh = 1.0 - similarity_threshold;
code_blocks.retain(|b| b.distance.is_none_or(|d| d <= dist_thresh));
doc_blocks.retain(|b| b.distance.is_none_or(|d| d <= dist_thresh));
text_blocks.retain(|b| b.distance.is_none_or(|d| d <= dist_thresh));
commit_blocks.retain(|b| b.distance.is_none_or(|d| d <= dist_thresh));
} else {
code_blocks.truncate(max_results);
doc_blocks.truncate(max_results);
text_blocks.truncate(max_results);
commit_blocks.truncate(max_results);
}
match mode {
"code" => Ok(format_code_search_results_as_text(
&code_blocks,
detail_level,
)),
"text" => Ok(format_text_search_results_as_text(
&text_blocks,
detail_level,
)),
"docs" => Ok(format_doc_search_results_as_text(&doc_blocks, detail_level)),
"commits" => Ok(format_commit_search_results_as_text(
&commit_blocks,
detail_level,
)),
"all" => Ok(format_combined_search_results_as_text(
&code_blocks,
&text_blocks,
&doc_blocks,
detail_level,
)),
_ => Err(anyhow::anyhow!(
"Invalid search mode '{}'. Use 'all', 'code', 'docs', 'commits', or 'text'.",
mode
)),
}
}
fn get_text_preview_with_lines(content: &str, start_line: usize) -> String {
let lines: Vec<&str> = content.lines().collect();
if lines.len() <= 10 {
return lines
.iter()
.enumerate()
.map(|(i, line)| format!("{}: {}", start_line + i, line))
.collect::<Vec<_>>()
.join("\n");
}
let mut start_idx = 0;
for (i, line) in lines.iter().enumerate() {
let trimmed = line.trim();
if !trimmed.is_empty() {
start_idx = i;
break;
}
}
let preview_start = 4;
let preview_end = 3;
let mut result = Vec::new();
for (i, line) in lines.iter().skip(start_idx).take(preview_start).enumerate() {
result.push(format!("{}: {}", start_line + start_idx + i, line));
}
if start_idx + preview_start < lines.len() {
let remaining_lines = lines.len() - (start_idx + preview_start);
if remaining_lines > preview_end {
result.push(format!("... ({} more lines)", remaining_lines));
let end_start_idx = lines.len() - preview_end;
for (i, line) in lines.iter().skip(end_start_idx).enumerate() {
result.push(format!("{}: {}", start_line + end_start_idx + i, line));
}
} else {
for (i, line) in lines.iter().skip(start_idx + preview_start).enumerate() {
result.push(format!(
"{}: {}",
start_line + start_idx + preview_start + i,
line
));
}
}
}
result.join("\n")
}
fn get_doc_preview_with_lines(content: &str, start_line: usize) -> String {
let lines: Vec<&str> = content.lines().collect();
if lines.len() <= 10 {
return lines
.iter()
.enumerate()
.map(|(i, line)| format!("{}: {}", start_line + i, line))
.collect::<Vec<_>>()
.join("\n");
}
let mut start_idx = 0;
for (i, line) in lines.iter().enumerate() {
let trimmed = line.trim();
if !trimmed.is_empty() {
start_idx = i;
break;
}
}
let preview_start = 4;
let preview_end = 3;
let mut result = Vec::new();
for (i, line) in lines.iter().skip(start_idx).take(preview_start).enumerate() {
result.push(format!("{}: {}", start_line + start_idx + i, line));
}
if start_idx + preview_start < lines.len() {
let remaining_lines = lines.len() - (start_idx + preview_start);
if remaining_lines > preview_end {
result.push(format!("... ({} more lines)", remaining_lines));
let end_start_idx = lines.len() - preview_end;
for (i, line) in lines.iter().skip(end_start_idx).enumerate() {
result.push(format!("{}: {}", start_line + end_start_idx + i, line));
}
} else {
for (i, line) in lines.iter().skip(start_idx + preview_start).enumerate() {
result.push(format!(
"{}: {}",
start_line + start_idx + preview_start + i,
line
));
}
}
}
result.join("\n")
}
fn get_code_preview_with_lines(content: &str, start_line: usize, _language: &str) -> String {
let lines: Vec<&str> = content.lines().collect();
if lines.len() <= 10 {
return lines
.iter()
.enumerate()
.map(|(i, line)| format!("{}: {}", start_line + i, line))
.collect::<Vec<_>>()
.join("\n");
}
let mut start_idx = 0;
for (i, line) in lines.iter().enumerate() {
let trimmed = line.trim();
if trimmed.is_empty() {
continue;
}
if trimmed.starts_with("//") || trimmed.starts_with("#") || trimmed.starts_with("/*") || trimmed.starts_with("*") || trimmed.starts_with("<!--") || trimmed.starts_with("--") || trimmed.starts_with("%") || trimmed.starts_with(";") || trimmed.starts_with("\"\"\"") || trimmed.starts_with("'''")
{
continue;
}
start_idx = i;
break;
}
let preview_start = 4;
let preview_end = 3;
let mut result = Vec::new();
for (i, line) in lines.iter().skip(start_idx).take(preview_start).enumerate() {
result.push(format!("{}: {}", start_line + start_idx + i, line));
}
if start_idx + preview_start < lines.len() {
let remaining_lines = lines.len() - (start_idx + preview_start);
if remaining_lines > preview_end {
result.push(format!("... ({} more lines)", remaining_lines));
let end_start_idx = lines.len() - preview_end;
for (i, line) in lines.iter().skip(end_start_idx).enumerate() {
result.push(format!("{}: {}", start_line + end_start_idx + i, line));
}
} else {
for (i, line) in lines.iter().skip(start_idx + preview_start).enumerate() {
result.push(format!(
"{}: {}",
start_line + start_idx + preview_start + i,
line
));
}
}
}
result.join("\n")
}
#[derive(Debug, Clone)]
pub struct QuerySearchResult {
pub query_index: usize,
pub code_blocks: Vec<crate::store::CodeBlock>,
pub doc_blocks: Vec<crate::store::DocumentBlock>,
pub text_blocks: Vec<crate::store::TextBlock>,
pub commit_blocks: Vec<crate::store::CommitBlock>,
}
pub async fn generate_batch_embeddings_for_queries(
queries: &[String],
mode: &str,
config: &Config,
) -> Result<Vec<crate::embedding::SearchModeEmbeddings>> {
match mode {
"code" => {
let code_embeddings = crate::embedding::generate_embeddings_batch(
queries.to_vec(),
true,
config,
crate::embedding::types::InputType::Query,
)
.await?;
Ok(code_embeddings
.into_iter()
.map(|emb| crate::embedding::SearchModeEmbeddings {
code_embeddings: Some(emb),
text_embeddings: None,
})
.collect())
}
"docs" | "text" | "commits" => {
let text_embeddings = crate::embedding::generate_embeddings_batch(
queries.to_vec(),
false,
config,
crate::embedding::types::InputType::Query,
)
.await?;
Ok(text_embeddings
.into_iter()
.map(|emb| crate::embedding::SearchModeEmbeddings {
code_embeddings: None,
text_embeddings: Some(emb),
})
.collect())
}
"all" => {
let code_model = &config.embedding.code_model;
let text_model = &config.embedding.text_model;
if code_model == text_model {
let embeddings = crate::embedding::generate_embeddings_batch(
queries.to_vec(),
true,
config,
crate::embedding::types::InputType::Query,
)
.await?;
Ok(embeddings
.into_iter()
.map(|emb| crate::embedding::SearchModeEmbeddings {
code_embeddings: Some(emb.clone()),
text_embeddings: Some(emb),
})
.collect())
} else {
let (code_embeddings, text_embeddings) = tokio::try_join!(
crate::embedding::generate_embeddings_batch(
queries.to_vec(),
true,
config,
crate::embedding::types::InputType::Query
),
crate::embedding::generate_embeddings_batch(
queries.to_vec(),
false,
config,
crate::embedding::types::InputType::Query
)
)?;
Ok(code_embeddings
.into_iter()
.zip(text_embeddings.into_iter())
.map(
|(code_emb, text_emb)| crate::embedding::SearchModeEmbeddings {
code_embeddings: Some(code_emb),
text_embeddings: Some(text_emb),
},
)
.collect())
}
}
_ => Err(anyhow::anyhow!("Invalid search mode: {}", mode)),
}
}
pub async fn execute_single_search_with_embeddings(
store: &Store,
embeddings: crate::embedding::SearchModeEmbeddings,
mode: &str,
per_query_limit: usize,
query_index: usize,
distance_threshold: Option<f32>,
language_filter: Option<&str>,
) -> Result<QuerySearchResult> {
let mut code_blocks = Vec::new();
let mut doc_blocks = Vec::new();
let mut text_blocks = Vec::new();
let mut commit_blocks = Vec::new();
match mode {
"code" => {
if let Some(code_emb) = embeddings.code_embeddings {
code_blocks = store
.get_code_blocks_with_language_filter(
code_emb,
Some(per_query_limit),
distance_threshold,
language_filter,
)
.await?;
}
}
"docs" => {
if let Some(text_emb) = embeddings.text_embeddings {
doc_blocks = store
.get_document_blocks_with_config(
text_emb,
Some(per_query_limit),
distance_threshold,
)
.await?;
}
}
"text" => {
if let Some(text_emb) = embeddings.text_embeddings {
text_blocks = store
.get_text_blocks_with_config(
text_emb,
Some(per_query_limit),
distance_threshold,
)
.await?;
}
}
"commits" => {
if let Some(text_emb) = embeddings.text_embeddings {
commit_blocks = store
.get_commit_blocks_with_config(
text_emb,
Some(per_query_limit),
distance_threshold,
)
.await?;
}
}
"all" => {
let results_per_type = per_query_limit.div_ceil(3);
if let Some(code_emb) = embeddings.code_embeddings {
code_blocks = store
.get_code_blocks_with_language_filter(
code_emb,
Some(results_per_type),
distance_threshold,
language_filter,
)
.await?;
}
if let Some(text_emb) = embeddings.text_embeddings {
let text_emb_clone = text_emb.clone();
let (text_result, doc_result) = tokio::try_join!(
store.get_text_blocks_with_config(
text_emb,
Some(results_per_type),
distance_threshold,
),
store.get_document_blocks_with_config(
text_emb_clone,
Some(results_per_type),
distance_threshold,
)
)?;
text_blocks = text_result;
doc_blocks = doc_result;
}
}
_ => return Err(anyhow::anyhow!("Invalid search mode: {}", mode)),
}
Ok(QuerySearchResult {
query_index,
code_blocks,
doc_blocks,
text_blocks,
commit_blocks,
})
}
pub async fn execute_parallel_searches(
store: &Store,
query_embeddings: Vec<(String, crate::embedding::SearchModeEmbeddings)>,
params: &SearchParams<'_>,
) -> Result<Vec<QuerySearchResult>> {
let per_query_limit = if params.config.search.reranker.enabled {
params.config.search.reranker.top_k_candidates
} else {
(params.max_results * 2) / query_embeddings.len().max(1)
};
let main_limit = if params.branch_ctx.is_some() {
per_query_limit * 2
} else {
per_query_limit
};
let distance_threshold = if params.config.search.reranker.enabled {
None
} else {
Some(1.0 - params.similarity_threshold)
};
let main_futures: Vec<_> = query_embeddings
.iter()
.enumerate()
.map(|(index, (_, embeddings))| {
let emb = embeddings.clone();
async move {
execute_single_search_with_embeddings(
store,
emb,
params.mode,
main_limit,
index,
distance_threshold,
params.language_filter,
)
.await
}
})
.collect();
let mut main_results = futures::future::try_join_all(main_futures).await?;
let Some(branch) = params.branch_ctx else {
return Ok(main_results);
};
let branch_futures: Vec<_> = query_embeddings
.iter()
.enumerate()
.map(|(index, (_, embeddings))| {
let emb = embeddings.clone();
async move {
execute_single_search_with_embeddings(
&branch.store,
emb,
params.mode,
per_query_limit,
index,
distance_threshold,
params.language_filter,
)
.await
}
})
.collect();
let branch_results = futures::future::try_join_all(branch_futures).await?;
let merged: Vec<QuerySearchResult> = main_results
.drain(..)
.zip(branch_results)
.map(|(main, branch_r)| {
merge_branch_query_results(main, branch_r, &branch.manifest, per_query_limit)
})
.collect();
Ok(merged)
}
pub fn apply_multi_query_bonus_code(
block: &mut crate::store::CodeBlock,
query_indices: &[usize],
total_queries: usize,
) {
if query_indices.len() > 1 && total_queries > 1 {
let coverage_ratio = query_indices.len() as f32 / total_queries as f32;
let bonus_factor = 1.0 - (coverage_ratio * 0.1).min(0.2);
if let Some(distance) = block.distance {
block.distance = Some(distance * bonus_factor);
}
}
}
pub fn apply_multi_query_bonus_doc(
block: &mut crate::store::DocumentBlock,
query_indices: &[usize],
total_queries: usize,
) {
if query_indices.len() > 1 && total_queries > 1 {
let coverage_ratio = query_indices.len() as f32 / total_queries as f32;
let bonus_factor = 1.0 - (coverage_ratio * 0.1).min(0.2);
if let Some(distance) = block.distance {
block.distance = Some(distance * bonus_factor);
}
}
}
pub fn apply_multi_query_bonus_text(
block: &mut crate::store::TextBlock,
query_indices: &[usize],
total_queries: usize,
) {
if query_indices.len() > 1 && total_queries > 1 {
let coverage_ratio = query_indices.len() as f32 / total_queries as f32;
let bonus_factor = 1.0 - (coverage_ratio * 0.1).min(0.2);
if let Some(distance) = block.distance {
block.distance = Some(distance * bonus_factor);
}
}
}
pub fn apply_multi_query_bonus_commit(
block: &mut crate::store::CommitBlock,
query_indices: &[usize],
total_queries: usize,
) {
if query_indices.len() > 1 && total_queries > 1 {
let coverage_ratio = query_indices.len() as f32 / total_queries as f32;
let bonus_factor = 1.0 - (coverage_ratio * 0.1).min(0.2);
if let Some(distance) = block.distance {
block.distance = Some(distance * bonus_factor);
}
}
}
pub fn deduplicate_and_merge_results(
search_results: Vec<QuerySearchResult>,
queries: &[String],
distance_threshold: Option<f32>,
) -> (
Vec<crate::store::CodeBlock>,
Vec<crate::store::DocumentBlock>,
Vec<crate::store::TextBlock>,
Vec<crate::store::CommitBlock>,
) {
use std::cmp::Ordering;
use std::collections::HashMap;
let mut code_map: HashMap<String, (crate::store::CodeBlock, Vec<usize>)> = HashMap::new();
for result in &search_results {
for block in &result.code_blocks {
match code_map.entry(block.hash.clone()) {
std::collections::hash_map::Entry::Vacant(e) => {
e.insert((block.clone(), vec![result.query_index]));
}
std::collections::hash_map::Entry::Occupied(mut e) => {
let (existing_block, query_indices) = e.get_mut();
query_indices.push(result.query_index);
if block.distance < existing_block.distance {
*existing_block = block.clone();
existing_block.start_line = block.start_line + 1;
existing_block.end_line = block.end_line + 1;
}
}
}
}
}
let mut doc_map: HashMap<String, (crate::store::DocumentBlock, Vec<usize>)> = HashMap::new();
for result in &search_results {
for block in &result.doc_blocks {
match doc_map.entry(block.hash.clone()) {
std::collections::hash_map::Entry::Vacant(e) => {
e.insert((block.clone(), vec![result.query_index]));
}
std::collections::hash_map::Entry::Occupied(mut e) => {
let (existing_block, query_indices) = e.get_mut();
query_indices.push(result.query_index);
if block.distance < existing_block.distance {
*existing_block = block.clone();
existing_block.start_line = block.start_line + 1;
existing_block.end_line = block.end_line + 1;
}
}
}
}
}
let mut text_map: HashMap<String, (crate::store::TextBlock, Vec<usize>)> = HashMap::new();
for result in &search_results {
for block in &result.text_blocks {
match text_map.entry(block.hash.clone()) {
std::collections::hash_map::Entry::Vacant(e) => {
e.insert((block.clone(), vec![result.query_index]));
}
std::collections::hash_map::Entry::Occupied(mut e) => {
let (existing_block, query_indices) = e.get_mut();
query_indices.push(result.query_index);
if block.distance < existing_block.distance {
*existing_block = block.clone();
existing_block.start_line = block.start_line + 1;
existing_block.end_line = block.end_line + 1;
}
}
}
}
}
let mut final_code_blocks: Vec<crate::store::CodeBlock> = code_map
.into_values()
.map(|(mut block, query_indices)| {
apply_multi_query_bonus_code(&mut block, &query_indices, queries.len());
block
})
.filter(|block| {
match distance_threshold {
Some(thresh) => block.distance.is_none_or(|d| d <= thresh),
None => true, }
})
.collect();
let mut final_doc_blocks: Vec<crate::store::DocumentBlock> = doc_map
.into_values()
.map(|(mut block, query_indices)| {
apply_multi_query_bonus_doc(&mut block, &query_indices, queries.len());
block
})
.filter(|block| match distance_threshold {
Some(thresh) => block.distance.is_none_or(|d| d <= thresh),
None => true,
})
.collect();
let mut final_text_blocks: Vec<crate::store::TextBlock> = text_map
.into_values()
.map(|(mut block, query_indices)| {
apply_multi_query_bonus_text(&mut block, &query_indices, queries.len());
block
})
.filter(|block| match distance_threshold {
Some(thresh) => block.distance.is_none_or(|d| d <= thresh),
None => true,
})
.collect();
final_code_blocks.sort_by(|a, b| match (a.distance, b.distance) {
(Some(dist_a), Some(dist_b)) => dist_a.partial_cmp(&dist_b).unwrap_or(Ordering::Equal),
(Some(_), None) => Ordering::Less,
(None, Some(_)) => Ordering::Greater,
(None, None) => Ordering::Equal,
});
final_doc_blocks.sort_by(|a, b| match (a.distance, b.distance) {
(Some(dist_a), Some(dist_b)) => dist_a.partial_cmp(&dist_b).unwrap_or(Ordering::Equal),
(Some(_), None) => Ordering::Less,
(None, Some(_)) => Ordering::Greater,
(None, None) => Ordering::Equal,
});
final_text_blocks.sort_by(|a, b| match (a.distance, b.distance) {
(Some(dist_a), Some(dist_b)) => dist_a.partial_cmp(&dist_b).unwrap_or(Ordering::Equal),
(Some(_), None) => Ordering::Less,
(None, Some(_)) => Ordering::Greater,
(None, None) => Ordering::Equal,
});
let mut commit_map: HashMap<String, (crate::store::CommitBlock, Vec<usize>)> = HashMap::new();
for result in &search_results {
for block in &result.commit_blocks {
match commit_map.entry(block.hash.clone()) {
std::collections::hash_map::Entry::Vacant(e) => {
e.insert((block.clone(), vec![result.query_index]));
}
std::collections::hash_map::Entry::Occupied(mut e) => {
let (existing_block, query_indices) = e.get_mut();
query_indices.push(result.query_index);
if block.distance < existing_block.distance {
*existing_block = block.clone();
}
}
}
}
}
let mut final_commit_blocks: Vec<crate::store::CommitBlock> = commit_map
.into_values()
.map(|(mut block, query_indices)| {
apply_multi_query_bonus_commit(&mut block, &query_indices, queries.len());
block
})
.filter(|block| match distance_threshold {
Some(thresh) => block.distance.is_none_or(|d| d <= thresh),
None => true,
})
.collect();
final_commit_blocks.sort_by(|a, b| match (a.distance, b.distance) {
(Some(dist_a), Some(dist_b)) => dist_a.partial_cmp(&dist_b).unwrap_or(Ordering::Equal),
(Some(_), None) => Ordering::Less,
(None, Some(_)) => Ordering::Greater,
(None, None) => Ordering::Equal,
});
(
final_code_blocks,
final_doc_blocks,
final_text_blocks,
final_commit_blocks,
)
}
fn merge_branch_code_blocks(
main: Vec<CodeBlock>,
branch: Vec<CodeBlock>,
overridden: &HashSet<&str>,
limit: usize,
) -> Vec<CodeBlock> {
let filtered_main = main
.into_iter()
.filter(|b| !overridden.contains(b.path.as_str()));
let mut combined: Vec<CodeBlock> = branch.into_iter().chain(filtered_main).collect();
combined.sort_by(|a, b| {
a.distance
.partial_cmp(&b.distance)
.unwrap_or(std::cmp::Ordering::Equal)
});
combined.truncate(limit);
combined
}
fn merge_branch_text_blocks(
main: Vec<TextBlock>,
branch: Vec<TextBlock>,
overridden: &HashSet<&str>,
limit: usize,
) -> Vec<TextBlock> {
let filtered_main = main
.into_iter()
.filter(|b| !overridden.contains(b.path.as_str()));
let mut combined: Vec<TextBlock> = branch.into_iter().chain(filtered_main).collect();
combined.sort_by(|a, b| {
a.distance
.partial_cmp(&b.distance)
.unwrap_or(std::cmp::Ordering::Equal)
});
combined.truncate(limit);
combined
}
fn merge_branch_doc_blocks(
main: Vec<DocumentBlock>,
branch: Vec<DocumentBlock>,
overridden: &HashSet<&str>,
limit: usize,
) -> Vec<DocumentBlock> {
let filtered_main = main
.into_iter()
.filter(|b| !overridden.contains(b.path.as_str()));
let mut combined: Vec<DocumentBlock> = branch.into_iter().chain(filtered_main).collect();
combined.sort_by(|a, b| {
a.distance
.partial_cmp(&b.distance)
.unwrap_or(std::cmp::Ordering::Equal)
});
combined.truncate(limit);
combined
}
fn merge_branch_query_results(
main: QuerySearchResult,
branch: QuerySearchResult,
manifest: &BranchManifest,
per_query_limit: usize,
) -> QuerySearchResult {
let overridden = manifest.overridden_paths();
QuerySearchResult {
query_index: main.query_index,
code_blocks: merge_branch_code_blocks(
main.code_blocks,
branch.code_blocks,
&overridden,
per_query_limit,
),
doc_blocks: merge_branch_doc_blocks(
main.doc_blocks,
branch.doc_blocks,
&overridden,
per_query_limit,
),
text_blocks: merge_branch_text_blocks(
main.text_blocks,
branch.text_blocks,
&overridden,
per_query_limit,
),
commit_blocks: main.commit_blocks,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_format_symbols_for_display_empty() {
let symbols: Vec<String> = vec![];
let result = format_symbols_for_display(&symbols);
assert_eq!(result, "");
}
#[test]
fn test_format_symbols_for_display_filters_types() {
let symbols = vec![
"function_definition".to_string(), "my_function".to_string(), "class_declaration".to_string(), "MyClass".to_string(), ];
let result = format_symbols_for_display(&symbols);
assert_eq!(result, "MyClass");
}
#[test]
fn test_format_symbols_for_display_deduplicates() {
let symbols = vec![
"foo".to_string(),
"bar".to_string(),
"foo".to_string(),
"baz".to_string(),
];
let result = format_symbols_for_display(&symbols);
assert_eq!(result, "bar, baz, foo");
}
#[test]
fn test_format_symbols_for_display_sorts() {
let symbols = vec![
"zebra".to_string(),
"apple".to_string(),
"mango".to_string(),
];
let result = format_symbols_for_display(&symbols);
assert_eq!(result, "apple, mango, zebra");
}
#[test]
fn test_format_symbols_for_display_mixed() {
let symbols = vec![
"my_func".to_string(), "type_alias".to_string(), "AnotherFunc".to_string(), "my_func".to_string(), "interface_def".to_string(), "SimpleFunc".to_string(), ];
let result = format_symbols_for_display(&symbols);
assert_eq!(result, "AnotherFunc, SimpleFunc");
}
#[test]
fn test_format_commit_search_results_empty() {
let blocks: Vec<crate::store::CommitBlock> = vec![];
let result = format_commit_search_results_as_text(&blocks, "partial");
assert_eq!(result, "No commit results found.");
}
#[test]
fn test_format_commit_search_results() {
let blocks = vec![crate::store::CommitBlock {
hash: "abc12345deadbeef".to_string(),
author: "Alice".to_string(),
date: 1700000000, message: "feat: add retry logic\n\nDetailed body here".to_string(),
content: "feat: add retry logic\n\nFiles: src/client.rs".to_string(),
files: r#"["src/client.rs","src/retry.rs"]"#.to_string(),
description: "Adds exponential backoff".to_string(),
distance: Some(0.15),
}];
let result = format_commit_search_results_as_text(&blocks, "partial");
assert!(result.contains("COMMIT RESULTS (1)"));
assert!(result.contains("abc12345"));
assert!(result.contains("Alice"));
assert!(result.contains("feat: add retry logic"));
assert!(!result.contains("Detailed body here"));
assert!(result.contains("src/client.rs"));
assert!(result.contains("Adds exponential backoff"));
let result_full = format_commit_search_results_as_text(&blocks, "full");
assert!(result_full.contains("Detailed body here"));
assert!(result_full.contains("src/client.rs"));
let result_sig = format_commit_search_results_as_text(&blocks, "signatures");
assert!(result_sig.contains("feat: add retry logic"));
assert!(!result_sig.contains("Detailed body here"));
assert!(!result_sig.contains("src/client.rs"));
}
#[test]
fn test_apply_multi_query_bonus_commit() {
let mut block = crate::store::CommitBlock {
hash: "abc".to_string(),
author: "A".to_string(),
date: 0,
message: "m".to_string(),
content: "c".to_string(),
files: "[]".to_string(),
description: String::new(),
distance: Some(0.5),
};
apply_multi_query_bonus_commit(&mut block, &[0], 3);
assert_eq!(block.distance, Some(0.5));
apply_multi_query_bonus_commit(&mut block, &[0, 1], 3);
assert!(block.distance.unwrap() < 0.5);
}
fn make_code_block(path: &str, distance: f32) -> CodeBlock {
CodeBlock {
path: path.to_string(),
language: "rust".to_string(),
content: format!("// {}", path),
symbols: vec![],
start_line: 1,
end_line: 10,
hash: format!("hash_{}", path),
distance: Some(distance),
}
}
fn make_text_block(path: &str, distance: f32) -> TextBlock {
TextBlock {
path: path.to_string(),
language: "text".to_string(),
content: format!("text {}", path),
start_line: 1,
end_line: 5,
hash: format!("hash_{}", path),
distance: Some(distance),
}
}
fn make_doc_block(path: &str, distance: f32) -> DocumentBlock {
DocumentBlock {
path: path.to_string(),
title: "Test".to_string(),
content: format!("doc {}", path),
context: vec![],
level: 1,
start_line: 1,
end_line: 5,
hash: format!("hash_{}", path),
distance: Some(distance),
}
}
#[test]
fn test_merge_branch_code_blocks_filters_overridden() {
let main = vec![
make_code_block("src/a.rs", 0.1),
make_code_block("src/b.rs", 0.2),
make_code_block("src/c.rs", 0.3),
];
let branch = vec![make_code_block("src/b.rs", 0.15)];
let overridden: HashSet<&str> = ["src/b.rs"].into_iter().collect();
let result = merge_branch_code_blocks(main, branch, &overridden, 10);
assert_eq!(result.len(), 3); assert_eq!(result[0].path, "src/a.rs"); assert_eq!(result[1].path, "src/b.rs"); assert_eq!(result[2].path, "src/c.rs"); }
#[test]
fn test_merge_branch_code_blocks_deleted_files_excluded() {
let main = vec![
make_code_block("src/a.rs", 0.1),
make_code_block("src/deleted.rs", 0.2),
];
let branch: Vec<CodeBlock> = vec![];
let overridden: HashSet<&str> = ["src/deleted.rs"].into_iter().collect();
let result = merge_branch_code_blocks(main, branch, &overridden, 10);
assert_eq!(result.len(), 1);
assert_eq!(result[0].path, "src/a.rs");
}
#[test]
fn test_merge_branch_code_blocks_respects_limit() {
let main = vec![
make_code_block("src/a.rs", 0.1),
make_code_block("src/b.rs", 0.2),
make_code_block("src/c.rs", 0.3),
];
let branch = vec![make_code_block("src/d.rs", 0.05)];
let overridden: HashSet<&str> = ["src/d.rs"].into_iter().collect();
let result = merge_branch_code_blocks(main, branch, &overridden, 2);
assert_eq!(result.len(), 2);
assert_eq!(result[0].path, "src/d.rs"); assert_eq!(result[1].path, "src/a.rs"); }
#[test]
fn test_merge_branch_code_blocks_empty_branch() {
let main = vec![
make_code_block("src/a.rs", 0.1),
make_code_block("src/b.rs", 0.2),
];
let branch: Vec<CodeBlock> = vec![];
let overridden: HashSet<&str> = HashSet::new();
let result = merge_branch_code_blocks(main, branch, &overridden, 10);
assert_eq!(result.len(), 2);
}
#[test]
fn test_merge_branch_text_blocks() {
let main = vec![make_text_block("docs/a.txt", 0.1)];
let branch = vec![make_text_block("docs/a.txt", 0.05)];
let overridden: HashSet<&str> = ["docs/a.txt"].into_iter().collect();
let result = merge_branch_text_blocks(main, branch, &overridden, 10);
assert_eq!(result.len(), 1);
assert_eq!(result[0].distance, Some(0.05)); }
#[test]
fn test_merge_branch_doc_blocks() {
let main = vec![
make_doc_block("README.md", 0.1),
make_doc_block("CHANGELOG.md", 0.3),
];
let branch = vec![make_doc_block("README.md", 0.2)];
let overridden: HashSet<&str> = ["README.md"].into_iter().collect();
let result = merge_branch_doc_blocks(main, branch, &overridden, 10);
assert_eq!(result.len(), 2);
assert_eq!(result[0].path, "README.md");
assert_eq!(result[0].distance, Some(0.2));
assert_eq!(result[1].path, "CHANGELOG.md");
}
#[test]
fn test_merge_branch_query_results() {
let manifest = BranchManifest {
version: 1,
branch_name: "feat".to_string(),
base_branch: "main".to_string(),
base_commit: "aaa".to_string(),
branch_commit: "bbb".to_string(),
changed_paths: vec!["src/x.rs".to_string()],
deleted_paths: vec!["src/old.rs".to_string()],
indexed_at: 0,
};
let main = QuerySearchResult {
query_index: 0,
code_blocks: vec![
make_code_block("src/x.rs", 0.1),
make_code_block("src/y.rs", 0.2),
make_code_block("src/old.rs", 0.3),
],
doc_blocks: vec![],
text_blocks: vec![],
commit_blocks: vec![],
};
let branch = QuerySearchResult {
query_index: 0,
code_blocks: vec![make_code_block("src/x.rs", 0.15)],
doc_blocks: vec![],
text_blocks: vec![],
commit_blocks: vec![],
};
let merged = super::merge_branch_query_results(main, branch, &manifest, 10);
assert_eq!(merged.code_blocks.len(), 2);
assert_eq!(merged.code_blocks[0].path, "src/x.rs");
assert_eq!(merged.code_blocks[0].distance, Some(0.15));
assert_eq!(merged.code_blocks[1].path, "src/y.rs");
}
}