use crate::config::Config;
use crate::store::{CodeBlock, Store};
use anyhow::Result;
use std::collections::HashSet;
pub fn render_code_blocks(blocks: &[CodeBlock]) {
render_code_blocks_with_config(blocks, &Config::default(), "partial");
}
pub fn render_code_blocks_with_config(blocks: &[CodeBlock], config: &Config, detail_level: &str) {
if blocks.is_empty() {
println!("No code blocks found for the query.");
return;
}
println!("Found {} code blocks:\n", blocks.len());
for (idx, block) in blocks.iter().enumerate() {
println!(
"╔══════════════════ File: {} ══════════════════",
block.path
);
println!("║");
println!("║ Result {} of {}", idx + 1, blocks.len());
println!("║ Language: {}", block.language);
println!("║ Lines: {}-{}", block.start_line, block.end_line);
if let Some(distance) = block.distance {
println!("║ Similarity: {:.4}", 1.0 - distance);
}
if !block.symbols.is_empty() {
println!("║ Symbols:");
let mut display_symbols = block.symbols.clone();
display_symbols.sort();
display_symbols.dedup();
for symbol in display_symbols {
if !symbol.contains("_") {
println!("║ • {}", symbol);
}
}
}
println!("║ Content:");
println!("║ ┌────────────────────────────────────");
match detail_level {
"signatures" => {
let lines: Vec<&str> = block.content.lines().collect();
if !lines.is_empty() {
if let Some(first_line) = lines.first() {
println!("║ │ {:4} │ {}", block.start_line, first_line.trim());
}
}
}
"partial" => {
let lines: Vec<&str> = block.content.lines().collect();
if lines.len() <= 10 {
for (i, line) in lines.iter().enumerate() {
println!("║ │ {:4} │ {}", block.start_line + i + 1, line);
}
} else {
for (i, line) in lines.iter().take(4).enumerate() {
println!("║ │ {:4} │ {}", block.start_line + i + 1, line);
}
let omitted_lines = lines.len() - 7; if omitted_lines > 0 {
println!("║ │ │ ... ({} more lines)", omitted_lines);
}
let last_3_start = lines.len() - 3;
for (i, line) in lines.iter().skip(last_3_start).enumerate() {
println!(
"║ │ {:4} │ {}",
block.start_line + last_3_start + i + 1,
line
);
}
}
}
"full" => {
let max_chars = config.search.search_block_max_characters;
if max_chars > 0 && block.content.len() > max_chars {
let (content, was_truncated) =
crate::indexer::truncate_content_smartly(&block.content, max_chars);
for (i, line) in content.lines().enumerate() {
println!("║ │ {:4} │ {}", block.start_line + i, line);
}
if was_truncated {
println!(
"║ │ │ [Content truncated - limit: {} chars]",
max_chars
);
}
} else {
for (i, line) in block.content.lines().enumerate() {
println!("║ │ {:4} │ {}", block.start_line + i, line);
}
}
}
_ => {
let lines: Vec<&str> = block.content.lines().collect();
if lines.len() <= 10 {
for (i, line) in lines.iter().enumerate() {
println!("║ │ {:4} │ {}", block.start_line + i + 1, line);
}
} else {
for (i, line) in lines.iter().take(4).enumerate() {
println!("║ │ {:4} │ {}", block.start_line + i + 1, line);
}
let omitted_lines = lines.len() - 7;
if omitted_lines > 0 {
println!("║ │ │ ... ({} more lines)", omitted_lines);
}
let last_3_start = lines.len() - 3;
for (i, line) in lines.iter().skip(last_3_start).enumerate() {
println!(
"║ │ {:4} │ {}",
block.start_line + last_3_start + i + 1,
line
);
}
}
}
}
println!("║ └────────────────────────────────────");
println!("╚════════════════════════════════════════\n");
}
}
pub fn render_results_json(results: &[CodeBlock]) -> Result<(), anyhow::Error> {
let json = serde_json::to_string_pretty(results)?;
println!("{}", json);
Ok(())
}
pub async fn expand_symbols(
store: &Store,
code_blocks: Vec<CodeBlock>,
) -> Result<Vec<CodeBlock>, anyhow::Error> {
let mut expanded_blocks = Vec::new();
let mut original_hashes = HashSet::new();
for block in &code_blocks {
expanded_blocks.push(block.clone());
original_hashes.insert(block.hash.clone());
}
let mut symbol_refs = Vec::new();
for block in &code_blocks {
for symbol in &block.symbols {
if !symbol.contains("_") && symbol.chars().next().is_some_and(|c| c.is_alphabetic()) {
symbol_refs.push(symbol.clone());
}
}
}
symbol_refs.sort();
symbol_refs.dedup();
println!("Found {} unique symbols to expand", symbol_refs.len());
let mut additional_blocks = Vec::new();
for symbol in &symbol_refs {
if let Some(block) = store.get_code_block_by_symbol(symbol).await? {
if !original_hashes.contains(&block.hash)
&& !additional_blocks
.iter()
.any(|b: &CodeBlock| b.hash == block.hash)
{
additional_blocks.push(block);
}
}
}
additional_blocks.sort_by(|a, b| {
let a_matches = a.symbols.iter().filter(|s| symbol_refs.contains(s)).count();
let b_matches = b.symbols.iter().filter(|s| symbol_refs.contains(s)).count();
let match_cmp = b_matches.cmp(&a_matches);
if match_cmp == std::cmp::Ordering::Equal {
let path_cmp = a.path.cmp(&b.path);
if path_cmp == std::cmp::Ordering::Equal {
a.start_line.cmp(&b.start_line)
} else {
path_cmp
}
} else {
match_cmp
}
});
expanded_blocks.extend(additional_blocks);
Ok(expanded_blocks)
}
pub async fn search_codebase_with_details(
query: &str,
mode: &str,
detail_level: &str,
max_results: usize,
config: &Config,
) -> Result<String> {
let store = Store::new().await?;
let search_embeddings =
crate::embedding::generate_search_embeddings(query, mode, config).await?;
match mode {
"code" => {
let embeddings = search_embeddings.code_embeddings.ok_or_else(|| {
anyhow::anyhow!("No code embeddings generated for code search mode")
})?;
let results = store
.get_code_blocks_with_config(
embeddings,
Some(max_results),
Some(config.search.similarity_threshold),
)
.await?;
Ok(format_code_search_results_with_detail(
&results,
detail_level,
))
}
"text" => {
let embeddings = search_embeddings.text_embeddings.ok_or_else(|| {
anyhow::anyhow!("No text embeddings generated for text search mode")
})?;
let results = store
.get_text_blocks_with_config(
embeddings,
Some(max_results),
Some(config.search.similarity_threshold),
)
.await?;
Ok(format_text_search_results_as_markdown(&results))
}
"docs" => {
let embeddings = search_embeddings.text_embeddings.ok_or_else(|| {
anyhow::anyhow!("No text embeddings generated for docs search mode")
})?;
let results = store
.get_document_blocks_with_config(
embeddings,
Some(max_results),
Some(config.search.similarity_threshold),
)
.await?;
Ok(format_doc_search_results_as_markdown(&results))
}
"all" => {
let code_embeddings = search_embeddings.code_embeddings.ok_or_else(|| {
anyhow::anyhow!("No code embeddings generated for all search mode")
})?;
let text_embeddings = search_embeddings.text_embeddings.ok_or_else(|| {
anyhow::anyhow!("No text embeddings generated for all search mode")
})?;
let results_per_type = max_results.div_ceil(3); let code_results = store
.get_code_blocks_with_config(
code_embeddings,
Some(results_per_type),
Some(config.search.similarity_threshold),
)
.await?;
let text_results = store
.get_text_blocks_with_config(
text_embeddings.clone(),
Some(results_per_type),
Some(config.search.similarity_threshold),
)
.await?;
let doc_results = store
.get_document_blocks_with_config(
text_embeddings,
Some(results_per_type),
Some(config.search.similarity_threshold),
)
.await?;
Ok(format_combined_search_results_with_detail(
&code_results,
&text_results,
&doc_results,
detail_level,
))
}
_ => Err(anyhow::anyhow!(
"Invalid search mode '{}'. Use 'all', 'code', 'docs', or 'text'.",
mode
)),
}
}
pub async fn search_codebase(query: &str, mode: &str, config: &Config) -> Result<String> {
let store = Store::new().await?;
let search_embeddings =
crate::embedding::generate_search_embeddings(query, mode, config).await?;
match mode {
"code" => {
let embeddings = search_embeddings.code_embeddings.ok_or_else(|| {
anyhow::anyhow!("No code embeddings generated for code search mode")
})?;
let results = store
.get_code_blocks_with_config(
embeddings,
Some(config.search.max_results),
Some(config.search.similarity_threshold),
)
.await?;
Ok(format_code_search_results_as_markdown(&results))
}
"text" => {
let embeddings = search_embeddings.text_embeddings.ok_or_else(|| {
anyhow::anyhow!("No text embeddings generated for text search mode")
})?;
let results = store
.get_text_blocks_with_config(
embeddings,
Some(config.search.max_results),
Some(config.search.similarity_threshold),
)
.await?;
Ok(format_text_search_results_as_markdown(&results))
}
"docs" => {
let embeddings = search_embeddings.text_embeddings.ok_or_else(|| {
anyhow::anyhow!("No text embeddings generated for docs search mode")
})?;
let results = store
.get_document_blocks_with_config(
embeddings,
Some(config.search.max_results),
Some(config.search.similarity_threshold),
)
.await?;
Ok(format_doc_search_results_as_markdown(&results))
}
"all" => {
let code_embeddings = search_embeddings.code_embeddings.ok_or_else(|| {
anyhow::anyhow!("No code embeddings generated for all search mode")
})?;
let text_embeddings = search_embeddings.text_embeddings.ok_or_else(|| {
anyhow::anyhow!("No text embeddings generated for all search mode")
})?;
let code_results = store
.get_code_blocks_with_config(
code_embeddings,
Some(config.search.max_results),
Some(config.search.similarity_threshold),
)
.await?;
let text_results = store
.get_text_blocks_with_config(
text_embeddings.clone(),
Some(config.search.max_results),
Some(config.search.similarity_threshold),
)
.await?;
let doc_results = store
.get_document_blocks_with_config(
text_embeddings,
Some(config.search.max_results),
Some(config.search.similarity_threshold),
)
.await?;
Ok(format_combined_search_results_as_markdown(
&code_results,
&text_results,
&doc_results,
))
}
_ => Err(anyhow::anyhow!(
"Invalid search mode '{}'. Use 'all', 'code', 'docs', or 'text'.",
mode
)),
}
}
fn format_code_search_results_with_detail(blocks: &[CodeBlock], detail_level: &str) -> String {
if blocks.is_empty() {
return "No code results found.".to_string();
}
let mut output = String::new();
output.push_str(&format!(
"# Code Search Results\n\nFound {} code blocks:\n\n",
blocks.len()
));
let mut blocks_by_file: std::collections::HashMap<String, Vec<&CodeBlock>> =
std::collections::HashMap::new();
for block in blocks {
let display_path = ensure_relative_path(&block.path);
blocks_by_file.entry(display_path).or_default().push(block);
}
for (file_path, file_blocks) in blocks_by_file.iter() {
output.push_str(&format!("## File: {}\n\n", file_path));
for (idx, block) in file_blocks.iter().enumerate() {
output.push_str(&format!(
"### Block {} of {} in file\n\n",
idx + 1,
file_blocks.len()
));
output.push_str(&format!("- **Language**: {}\n", block.language));
output.push_str(&format!(
"- **Lines**: {}-{}\n",
block.start_line, block.end_line
));
if let Some(distance) = block.distance {
output.push_str(&format!("- **Similarity**: {:.4}\n", 1.0 - distance));
}
if !block.symbols.is_empty() {
output.push_str("- **Symbols**: ");
let mut display_symbols = block.symbols.clone();
display_symbols.sort();
display_symbols.dedup();
let relevant_symbols: Vec<String> = display_symbols
.iter()
.filter(|symbol| !symbol.contains("_"))
.cloned()
.collect();
if !relevant_symbols.is_empty() {
output.push_str(&relevant_symbols.join(", "));
}
output.push('\n');
}
output.push_str("\n**Content:**\n\n");
match detail_level {
"signatures" => {
let preview = get_code_preview(&block.content, &block.language);
output.push_str("```");
output.push_str(&block.language);
output.push('\n');
output.push_str(&preview);
output.push_str("\n```\n\n");
}
"full" => {
output.push_str("```");
output.push_str(&block.language);
output.push('\n');
output.push_str(&block.content);
output.push_str("\n```\n\n");
}
_ => {
output.push_str("```");
output.push_str(&block.language);
output.push('\n');
output.push_str(&block.content);
output.push_str("\n```\n\n");
}
}
}
}
output
}
fn format_code_search_results_as_markdown(blocks: &[CodeBlock]) -> String {
if blocks.is_empty() {
return "No code results found.".to_string();
}
let mut output = String::new();
output.push_str(&format!(
"# Code Search Results\n\nFound {} code blocks:\n\n",
blocks.len()
));
let mut blocks_by_file: std::collections::HashMap<String, Vec<&CodeBlock>> =
std::collections::HashMap::new();
for block in blocks {
blocks_by_file
.entry(block.path.clone())
.or_default()
.push(block);
}
for (file_path, file_blocks) in blocks_by_file.iter() {
output.push_str(&format!("## File: {}\n\n", file_path));
for (idx, block) in file_blocks.iter().enumerate() {
output.push_str(&format!(
"### Block {} of {} in file\n\n",
idx + 1,
file_blocks.len()
));
output.push_str(&format!("- **Language**: {}\n", block.language));
output.push_str(&format!(
"- **Lines**: {}-{}\n",
block.start_line, block.end_line
));
if let Some(distance) = block.distance {
output.push_str(&format!("- **Similarity**: {:.4}\n", 1.0 - distance));
}
if !block.symbols.is_empty() {
output.push_str("- **Symbols**: ");
let mut display_symbols = block.symbols.clone();
display_symbols.sort();
display_symbols.dedup();
let relevant_symbols: Vec<String> = display_symbols
.iter()
.filter(|symbol| !symbol.contains("_"))
.cloned()
.collect();
if !relevant_symbols.is_empty() {
output.push_str(&relevant_symbols.join(", "));
}
output.push('\n');
}
output.push_str("\n**Content:**\n\n");
output.push_str("```");
output.push_str(&block.language);
output.push('\n');
output.push_str(&block.content);
output.push_str("\n```\n\n");
}
}
output
}
fn format_text_search_results_as_markdown(blocks: &[crate::store::TextBlock]) -> String {
if blocks.is_empty() {
return "No text results found.".to_string();
}
let mut output = String::new();
output.push_str(&format!(
"# Text Search Results\n\nFound {} text blocks:\n\n",
blocks.len()
));
let mut blocks_by_file: std::collections::HashMap<String, Vec<&crate::store::TextBlock>> =
std::collections::HashMap::new();
for block in blocks {
blocks_by_file
.entry(block.path.clone())
.or_default()
.push(block);
}
for (file_path, file_blocks) in blocks_by_file.iter() {
output.push_str(&format!("## File: {}\n\n", file_path));
for (idx, block) in file_blocks.iter().enumerate() {
output.push_str(&format!(
"### Block {} of {} in file\n\n",
idx + 1,
file_blocks.len()
));
output.push_str(&format!("- **Language**: {}\n", block.language));
output.push_str(&format!(
"- **Lines**: {}-{}\n",
block.start_line, block.end_line
));
if let Some(distance) = block.distance {
output.push_str(&format!("- **Similarity**: {:.4}\n", 1.0 - distance));
}
output.push_str("\n**Content:**\n\n");
output.push_str("```");
output.push_str(&block.language);
output.push('\n');
output.push_str(&block.content);
output.push_str("\n```\n\n");
}
}
output
}
fn format_doc_search_results_as_markdown(blocks: &[crate::store::DocumentBlock]) -> String {
if blocks.is_empty() {
return "No documentation results found.".to_string();
}
let mut output = String::new();
output.push_str(&format!(
"# Documentation Search Results\n\nFound {} documentation sections:\n\n",
blocks.len()
));
let mut blocks_by_file: std::collections::HashMap<String, Vec<&crate::store::DocumentBlock>> =
std::collections::HashMap::new();
for block in blocks {
blocks_by_file
.entry(block.path.clone())
.or_default()
.push(block);
}
for (file_path, file_blocks) in blocks_by_file.iter() {
output.push_str(&format!("## File: {}\n\n", file_path));
for (idx, block) in file_blocks.iter().enumerate() {
output.push_str(&format!(
"### Section {} of {} in file\n\n",
idx + 1,
file_blocks.len()
));
output.push_str(&format!("- **Title**: {}\n", block.title));
output.push_str(&format!("- **Level**: {}\n", block.level));
output.push_str(&format!(
"- **Lines**: {}-{}\n",
block.start_line, block.end_line
));
if let Some(distance) = block.distance {
output.push_str(&format!("- **Similarity**: {:.4}\n", 1.0 - distance));
}
output.push_str("\n**Content:**\n\n");
output.push_str(&block.content);
output.push_str("\n\n");
}
}
output
}
pub fn format_code_search_results_as_text(blocks: &[CodeBlock], detail_level: &str) -> String {
if blocks.is_empty() {
return "No code results found.".to_string();
}
let mut output = String::new();
output.push_str(&format!("CODE RESULTS ({})\n", blocks.len()));
for (idx, block) in blocks.iter().enumerate() {
output.push_str(&format!("{}. {}\n", idx + 1, block.path));
if let Some(distance) = block.distance {
output.push_str(&format!(" | Similarity {:.3}", 1.0 - distance));
}
output.push('\n');
if !block.symbols.is_empty() {
let mut display_symbols = block.symbols.clone();
display_symbols.sort();
display_symbols.dedup();
let relevant_symbols: Vec<String> = display_symbols
.iter()
.filter(|symbol| !symbol.contains("_"))
.cloned()
.collect();
if !relevant_symbols.is_empty() {
output.push_str(&format!("Symbols: {}\n", relevant_symbols.join(", ")));
}
}
match detail_level {
"signatures" => {
let preview =
get_code_preview_with_lines(&block.content, block.start_line, &block.language);
if !preview.is_empty() {
if let Some(first_line) = preview.lines().next() {
output.push_str(&format!("{}\n", first_line));
}
}
}
"partial" => {
let preview =
get_code_preview_with_lines(&block.content, block.start_line, &block.language);
output.push_str(&preview);
if !preview.ends_with('\n') {
output.push('\n');
}
}
"full" => {
let content_with_lines = block
.content
.lines()
.enumerate()
.map(|(i, line)| format!("{}: {}", block.start_line + i, line))
.collect::<Vec<_>>()
.join("\n");
output.push_str(&content_with_lines);
if !content_with_lines.ends_with('\n') {
output.push('\n');
}
}
_ => {}
}
output.push('\n');
}
output
}
pub fn format_text_search_results_as_text(
blocks: &[crate::store::TextBlock],
detail_level: &str,
) -> String {
if blocks.is_empty() {
return "No text results found.".to_string();
}
let mut output = String::new();
output.push_str(&format!("TEXT RESULTS ({})\n", blocks.len()));
for (idx, block) in blocks.iter().enumerate() {
output.push_str(&format!("{}. {}\n", idx + 1, block.path));
if let Some(distance) = block.distance {
output.push_str(&format!(" | Similarity {:.3}", 1.0 - distance));
}
output.push('\n');
match detail_level {
"signatures" => {
let preview = get_text_preview_with_lines(&block.content, block.start_line);
if !preview.is_empty() {
if let Some(first_line) = preview.lines().next() {
output.push_str(&format!("{}\n", first_line));
}
}
}
"partial" => {
let preview = get_text_preview_with_lines(&block.content, block.start_line);
output.push_str(&preview);
if !preview.ends_with('\n') {
output.push('\n');
}
}
"full" => {
let content_with_lines = block
.content
.lines()
.enumerate()
.map(|(i, line)| format!("{}: {}", block.start_line + i, line))
.collect::<Vec<_>>()
.join("\n");
output.push_str(&content_with_lines);
if !content_with_lines.ends_with('\n') {
output.push('\n');
}
}
_ => {}
}
output.push('\n');
}
output
}
pub fn format_doc_search_results_as_text(
blocks: &[crate::store::DocumentBlock],
detail_level: &str,
) -> String {
if blocks.is_empty() {
return "No documentation results found.".to_string();
}
let mut output = String::new();
output.push_str(&format!("DOCUMENTATION RESULTS ({})\n", blocks.len()));
for (idx, block) in blocks.iter().enumerate() {
output.push_str(&format!("{}. {}\n", idx + 1, block.path));
output.push_str(&format!("{} (Level {})", block.title, block.level));
output.push_str(&format!(" | {}-{}", block.start_line, block.end_line));
if let Some(distance) = block.distance {
output.push_str(&format!(" | Similarity {:.3}", 1.0 - distance));
}
output.push('\n');
match detail_level {
"signatures" => {
let preview = get_doc_preview_with_lines(&block.content, block.start_line);
if !preview.is_empty() {
if let Some(first_line) = preview.lines().next() {
output.push_str(&format!("{}\n", first_line));
}
}
}
"partial" => {
let preview = get_doc_preview_with_lines(&block.content, block.start_line);
output.push_str(&preview);
if !preview.ends_with('\n') {
output.push('\n');
}
}
"full" => {
let content_with_lines = block
.content
.lines()
.enumerate()
.map(|(i, line)| format!("{}: {}", block.start_line + i, line))
.collect::<Vec<_>>()
.join("\n");
output.push_str(&content_with_lines);
if !content_with_lines.ends_with('\n') {
output.push('\n');
}
}
_ => {}
}
}
output
}
pub fn format_combined_search_results_as_text(
code_blocks: &[CodeBlock],
text_blocks: &[crate::store::TextBlock],
doc_blocks: &[crate::store::DocumentBlock],
detail_level: &str,
) -> String {
let total_results = code_blocks.len() + text_blocks.len() + doc_blocks.len();
if total_results == 0 {
return "No results found.".to_string();
}
let mut output = String::new();
output.push_str(&format!("SEARCH RESULTS ({} total)\n\n", total_results));
if !doc_blocks.is_empty() {
output.push_str(&format_doc_search_results_as_text(doc_blocks, detail_level));
output.push('\n');
}
if !code_blocks.is_empty() {
output.push_str(&format_code_search_results_as_text(
code_blocks,
detail_level,
));
output.push('\n');
}
if !text_blocks.is_empty() {
output.push_str(&format_text_search_results_as_text(
text_blocks,
detail_level,
));
}
output
}
fn format_combined_search_results_with_detail(
code_blocks: &[CodeBlock],
text_blocks: &[crate::store::TextBlock],
doc_blocks: &[crate::store::DocumentBlock],
detail_level: &str,
) -> String {
let mut output = String::new();
output.push_str("# Combined Search Results\n\n");
let total_results = code_blocks.len() + text_blocks.len() + doc_blocks.len();
if total_results == 0 {
return "No results found.".to_string();
}
output.push_str(&format!("Found {} total results:\n\n", total_results));
if !doc_blocks.is_empty() {
output.push_str(&format_doc_search_results_as_markdown(doc_blocks));
output.push('\n');
}
if !code_blocks.is_empty() {
output.push_str(&format_code_search_results_with_detail(
code_blocks,
detail_level,
));
output.push('\n');
}
if !text_blocks.is_empty() {
output.push_str(&format_text_search_results_as_markdown(text_blocks));
}
output
}
fn format_combined_search_results_as_markdown(
code_blocks: &[CodeBlock],
text_blocks: &[crate::store::TextBlock],
doc_blocks: &[crate::store::DocumentBlock],
) -> String {
let mut output = String::new();
output.push_str("# Combined Search Results\n\n");
let total_results = code_blocks.len() + text_blocks.len() + doc_blocks.len();
if total_results == 0 {
return "No results found.".to_string();
}
output.push_str(&format!("Found {} total results:\n\n", total_results));
if !doc_blocks.is_empty() {
output.push_str(&format_doc_search_results_as_markdown(doc_blocks));
output.push('\n');
}
if !code_blocks.is_empty() {
output.push_str(&format_code_search_results_as_markdown(code_blocks));
output.push('\n');
}
if !text_blocks.is_empty() {
output.push_str(&format_text_search_results_as_markdown(text_blocks));
}
output
}
pub async fn search_codebase_with_details_text(
query: &str,
mode: &str,
detail_level: &str,
max_results: usize,
similarity_threshold: f32,
language_filter: Option<&str>,
config: &Config,
) -> Result<String> {
let store = Store::new().await?;
let search_embeddings =
crate::embedding::generate_search_embeddings(query, mode, config).await?;
let distance_threshold = 1.0 - similarity_threshold;
match mode {
"code" => {
let embeddings = search_embeddings.code_embeddings.ok_or_else(|| {
anyhow::anyhow!("No code embeddings generated for code search mode")
})?;
let results = store
.get_code_blocks_with_language_filter(
embeddings,
Some(max_results),
Some(distance_threshold),
language_filter,
)
.await?;
Ok(format_code_search_results_as_text(&results, detail_level))
}
"text" => {
let embeddings = search_embeddings.text_embeddings.ok_or_else(|| {
anyhow::anyhow!("No text embeddings generated for text search mode")
})?;
let results = store
.get_text_blocks_with_config(
embeddings,
Some(max_results),
Some(distance_threshold),
)
.await?;
Ok(format_text_search_results_as_text(&results, detail_level))
}
"docs" => {
let embeddings = search_embeddings.text_embeddings.ok_or_else(|| {
anyhow::anyhow!("No text embeddings generated for docs search mode")
})?;
let results = store
.get_document_blocks_with_config(
embeddings,
Some(max_results),
Some(distance_threshold),
)
.await?;
Ok(format_doc_search_results_as_text(&results, detail_level))
}
"all" => {
let code_embeddings = search_embeddings.code_embeddings.ok_or_else(|| {
anyhow::anyhow!("No code embeddings generated for all search mode")
})?;
let text_embeddings = search_embeddings.text_embeddings.ok_or_else(|| {
anyhow::anyhow!("No text embeddings generated for all search mode")
})?;
let results_per_type = max_results.div_ceil(3); let code_results = store
.get_code_blocks_with_language_filter(
code_embeddings,
Some(results_per_type),
Some(distance_threshold),
language_filter,
)
.await?;
let text_results = store
.get_text_blocks_with_config(
text_embeddings.clone(),
Some(results_per_type),
Some(distance_threshold),
)
.await?;
let doc_results = store
.get_document_blocks_with_config(
text_embeddings,
Some(results_per_type),
Some(distance_threshold),
)
.await?;
Ok(format_combined_search_results_as_text(
&code_results,
&text_results,
&doc_results,
detail_level,
))
}
_ => Err(anyhow::anyhow!(
"Invalid search mode '{}'. Use 'all', 'code', 'docs', or 'text'.",
mode
)),
}
}
pub async fn search_codebase_with_details_multi_query_text(
queries: &[String],
mode: &str,
detail_level: &str,
max_results: usize,
similarity_threshold: f32,
language_filter: Option<&str>,
config: &Config,
) -> Result<String> {
let store = Store::new().await?;
if queries.is_empty() {
return Err(anyhow::anyhow!("At least one query is required"));
}
if queries.len() > octolib::embedding::constants::MAX_QUERIES {
return Err(anyhow::anyhow!(
"Maximum {} queries allowed, got {}. Use fewer, more specific terms.",
crate::constants::MAX_QUERIES,
queries.len()
));
}
let embeddings = generate_batch_embeddings_for_queries(queries, mode, config).await?;
let query_embeddings: Vec<_> = queries
.iter()
.cloned()
.zip(embeddings.into_iter())
.collect();
let distance_threshold = 1.0 - similarity_threshold;
let search_results = execute_parallel_searches(
&store,
query_embeddings,
mode,
max_results,
similarity_threshold, language_filter,
)
.await?;
let (mut code_blocks, mut doc_blocks, mut text_blocks) =
deduplicate_and_merge_results(search_results, queries, distance_threshold);
code_blocks.truncate(max_results);
doc_blocks.truncate(max_results);
text_blocks.truncate(max_results);
match mode {
"code" => Ok(format_code_search_results_as_text(
&code_blocks,
detail_level,
)),
"text" => Ok(format_text_search_results_as_text(
&text_blocks,
detail_level,
)),
"docs" => Ok(format_doc_search_results_as_text(&doc_blocks, detail_level)),
"all" => Ok(format_combined_search_results_as_text(
&code_blocks,
&text_blocks,
&doc_blocks,
detail_level,
)),
_ => Err(anyhow::anyhow!(
"Invalid search mode '{}'. Use 'all', 'code', 'docs', or 'text'.",
mode
)),
}
}
fn get_code_preview(content: &str, _language: &str) -> String {
let lines: Vec<&str> = content.lines().collect();
if lines.len() <= 10 {
return content.to_string();
}
let mut start_idx = 0;
for (i, line) in lines.iter().enumerate() {
let trimmed = line.trim();
if trimmed.is_empty() {
continue;
}
if trimmed.starts_with("//") || trimmed.starts_with("#") || trimmed.starts_with("/*") || trimmed.starts_with("*") || trimmed.starts_with("<!--") || trimmed.starts_with("--") || trimmed.starts_with("%") || trimmed.starts_with(";") || trimmed.starts_with("\"\"\"") || trimmed.starts_with("'''")
{
continue;
}
start_idx = i;
break;
}
let preview_start = 4;
let preview_end = 3;
let mut result = Vec::new();
for line in lines.iter().skip(start_idx).take(preview_start) {
result.push(*line);
}
if start_idx + preview_start < lines.len() {
let remaining_lines = lines.len() - (start_idx + preview_start);
if remaining_lines > preview_end {
result.push("// ... [content omitted] ...");
for line in lines.iter().skip(lines.len() - preview_end) {
result.push(*line);
}
} else {
for line in lines.iter().skip(start_idx + preview_start) {
result.push(*line);
}
}
}
result.join("\n")
}
fn get_text_preview_with_lines(content: &str, start_line: usize) -> String {
let lines: Vec<&str> = content.lines().collect();
if lines.len() <= 10 {
return lines
.iter()
.enumerate()
.map(|(i, line)| format!("{}: {}", start_line + i, line))
.collect::<Vec<_>>()
.join("\n");
}
let mut start_idx = 0;
for (i, line) in lines.iter().enumerate() {
let trimmed = line.trim();
if !trimmed.is_empty() {
start_idx = i;
break;
}
}
let preview_start = 4;
let preview_end = 3;
let mut result = Vec::new();
for (i, line) in lines.iter().skip(start_idx).take(preview_start).enumerate() {
result.push(format!("{}: {}", start_line + start_idx + i, line));
}
if start_idx + preview_start < lines.len() {
let remaining_lines = lines.len() - (start_idx + preview_start);
if remaining_lines > preview_end {
result.push(format!("... ({} more lines)", remaining_lines));
let end_start_idx = lines.len() - preview_end;
for (i, line) in lines.iter().skip(end_start_idx).enumerate() {
result.push(format!("{}: {}", start_line + end_start_idx + i, line));
}
} else {
for (i, line) in lines.iter().skip(start_idx + preview_start).enumerate() {
result.push(format!(
"{}: {}",
start_line + start_idx + preview_start + i,
line
));
}
}
}
result.join("\n")
}
fn get_doc_preview_with_lines(content: &str, start_line: usize) -> String {
let lines: Vec<&str> = content.lines().collect();
if lines.len() <= 10 {
return lines
.iter()
.enumerate()
.map(|(i, line)| format!("{}: {}", start_line + i, line))
.collect::<Vec<_>>()
.join("\n");
}
let mut start_idx = 0;
for (i, line) in lines.iter().enumerate() {
let trimmed = line.trim();
if !trimmed.is_empty() {
start_idx = i;
break;
}
}
let preview_start = 4;
let preview_end = 3;
let mut result = Vec::new();
for (i, line) in lines.iter().skip(start_idx).take(preview_start).enumerate() {
result.push(format!("{}: {}", start_line + start_idx + i, line));
}
if start_idx + preview_start < lines.len() {
let remaining_lines = lines.len() - (start_idx + preview_start);
if remaining_lines > preview_end {
result.push(format!("... ({} more lines)", remaining_lines));
let end_start_idx = lines.len() - preview_end;
for (i, line) in lines.iter().skip(end_start_idx).enumerate() {
result.push(format!("{}: {}", start_line + end_start_idx + i, line));
}
} else {
for (i, line) in lines.iter().skip(start_idx + preview_start).enumerate() {
result.push(format!(
"{}: {}",
start_line + start_idx + preview_start + i,
line
));
}
}
}
result.join("\n")
}
fn get_code_preview_with_lines(content: &str, start_line: usize, _language: &str) -> String {
let lines: Vec<&str> = content.lines().collect();
if lines.len() <= 10 {
return lines
.iter()
.enumerate()
.map(|(i, line)| format!("{}: {}", start_line + i, line))
.collect::<Vec<_>>()
.join("\n");
}
let mut start_idx = 0;
for (i, line) in lines.iter().enumerate() {
let trimmed = line.trim();
if trimmed.is_empty() {
continue;
}
if trimmed.starts_with("//") || trimmed.starts_with("#") || trimmed.starts_with("/*") || trimmed.starts_with("*") || trimmed.starts_with("<!--") || trimmed.starts_with("--") || trimmed.starts_with("%") || trimmed.starts_with(";") || trimmed.starts_with("\"\"\"") || trimmed.starts_with("'''")
{
continue;
}
start_idx = i;
break;
}
let preview_start = 4;
let preview_end = 3;
let mut result = Vec::new();
for (i, line) in lines.iter().skip(start_idx).take(preview_start).enumerate() {
result.push(format!("{}: {}", start_line + start_idx + i, line));
}
if start_idx + preview_start < lines.len() {
let remaining_lines = lines.len() - (start_idx + preview_start);
if remaining_lines > preview_end {
result.push(format!("... ({} more lines)", remaining_lines));
let end_start_idx = lines.len() - preview_end;
for (i, line) in lines.iter().skip(end_start_idx).enumerate() {
result.push(format!("{}: {}", start_line + end_start_idx + i, line));
}
} else {
for (i, line) in lines.iter().skip(start_idx + preview_start).enumerate() {
result.push(format!(
"{}: {}",
start_line + start_idx + preview_start + i,
line
));
}
}
}
result.join("\n")
}
fn ensure_relative_path(path: &str) -> String {
if let Ok(current_dir) = std::env::current_dir() {
if let Ok(absolute_path) = std::path::Path::new(path).canonicalize() {
if let Ok(relative) = absolute_path.strip_prefix(¤t_dir) {
return relative.to_string_lossy().to_string();
}
}
}
path.to_string()
}
pub async fn search_codebase_with_details_multi_query(
queries: &[String],
mode: &str,
detail_level: &str,
max_results: usize,
config: &Config,
) -> Result<String> {
let store = Store::new().await?;
if queries.is_empty() {
return Err(anyhow::anyhow!("At least one query is required"));
}
if queries.len() > octolib::embedding::constants::MAX_QUERIES {
return Err(anyhow::anyhow!(
"Maximum {} queries allowed, got {}. Use fewer, more specific terms.",
crate::constants::MAX_QUERIES,
queries.len()
));
}
let embeddings = generate_batch_embeddings_for_queries_mcp(queries, mode, config).await?;
let query_embeddings: Vec<_> = queries
.iter()
.cloned()
.zip(embeddings.into_iter())
.collect();
let search_results =
execute_parallel_searches_mcp(&store, query_embeddings, mode, max_results).await?;
let distance_threshold = 1.0 - config.search.similarity_threshold;
let (mut code_blocks, mut doc_blocks, mut text_blocks) =
deduplicate_and_merge_results_mcp(search_results, queries, distance_threshold);
code_blocks.truncate(max_results);
doc_blocks.truncate(max_results);
text_blocks.truncate(max_results);
match mode {
"code" => Ok(format_code_search_results_with_detail(
&code_blocks,
detail_level,
)),
"text" => Ok(format_text_search_results_as_markdown(&text_blocks)),
"docs" => Ok(format_doc_search_results_as_markdown(&doc_blocks)),
"all" => Ok(format_combined_search_results_with_detail(
&code_blocks,
&text_blocks,
&doc_blocks,
detail_level,
)),
_ => Err(anyhow::anyhow!(
"Invalid search mode '{}'. Use 'all', 'code', 'docs', or 'text'.",
mode
)),
}
}
async fn generate_batch_embeddings_for_queries_mcp(
queries: &[String],
mode: &str,
config: &Config,
) -> Result<Vec<crate::embedding::SearchModeEmbeddings>> {
match mode {
"code" => {
let code_embeddings = crate::embedding::generate_embeddings_batch(
queries.to_vec(),
true,
config,
crate::embedding::types::InputType::None,
)
.await?;
Ok(code_embeddings
.into_iter()
.map(|emb| crate::embedding::SearchModeEmbeddings {
code_embeddings: Some(emb),
text_embeddings: None,
})
.collect())
}
"docs" | "text" => {
let text_embeddings = crate::embedding::generate_embeddings_batch(
queries.to_vec(),
false,
config,
crate::embedding::types::InputType::Query,
)
.await?;
Ok(text_embeddings
.into_iter()
.map(|emb| crate::embedding::SearchModeEmbeddings {
code_embeddings: None,
text_embeddings: Some(emb),
})
.collect())
}
"all" => {
let code_model = &config.embedding.code_model;
let text_model = &config.embedding.text_model;
if code_model == text_model {
let embeddings = crate::embedding::generate_embeddings_batch(
queries.to_vec(),
true,
config,
crate::embedding::types::InputType::None,
)
.await?;
Ok(embeddings
.into_iter()
.map(|emb| crate::embedding::SearchModeEmbeddings {
code_embeddings: Some(emb.clone()),
text_embeddings: Some(emb),
})
.collect())
} else {
let (code_embeddings, text_embeddings) = tokio::try_join!(
crate::embedding::generate_embeddings_batch(
queries.to_vec(),
true,
config,
crate::embedding::types::InputType::None
),
crate::embedding::generate_embeddings_batch(
queries.to_vec(),
false,
config,
crate::embedding::types::InputType::Query
)
)?;
Ok(code_embeddings
.into_iter()
.zip(text_embeddings.into_iter())
.map(
|(code_emb, text_emb)| crate::embedding::SearchModeEmbeddings {
code_embeddings: Some(code_emb),
text_embeddings: Some(text_emb),
},
)
.collect())
}
}
_ => Err(anyhow::anyhow!("Invalid search mode: {}", mode)),
}
}
#[derive(Debug)]
struct QuerySearchResultMcp {
query_index: usize,
code_blocks: Vec<CodeBlock>,
doc_blocks: Vec<crate::store::DocumentBlock>,
text_blocks: Vec<crate::store::TextBlock>,
}
async fn execute_parallel_searches_mcp(
store: &Store,
query_embeddings: Vec<(String, crate::embedding::SearchModeEmbeddings)>,
mode: &str,
max_results: usize,
) -> Result<Vec<QuerySearchResultMcp>> {
let per_query_limit = (max_results * 2) / query_embeddings.len().max(1);
let search_futures: Vec<_> = query_embeddings
.into_iter()
.enumerate()
.map(|(index, (query, embeddings))| async move {
execute_single_search_with_embeddings_mcp(
store,
&query,
embeddings,
mode,
per_query_limit,
index,
)
.await
})
.collect();
futures::future::try_join_all(search_futures).await
}
async fn execute_single_search_with_embeddings_mcp(
store: &Store,
query: &str,
embeddings: crate::embedding::SearchModeEmbeddings,
mode: &str,
limit: usize,
query_index: usize,
) -> Result<QuerySearchResultMcp> {
let (code_blocks, doc_blocks, text_blocks) = match mode {
"code" => {
let code_embeddings = embeddings
.code_embeddings
.ok_or_else(|| anyhow::anyhow!("No code embeddings for code search"))?;
let mut blocks = store
.get_code_blocks_with_config(code_embeddings, Some(limit), Some(1.01))
.await?;
blocks = crate::reranker::Reranker::rerank_code_blocks(blocks, query);
crate::reranker::Reranker::tf_idf_boost(&mut blocks, query);
(blocks, vec![], vec![])
}
"docs" => {
let text_embeddings = embeddings
.text_embeddings
.ok_or_else(|| anyhow::anyhow!("No text embeddings for docs search"))?;
let mut blocks = store
.get_document_blocks_with_config(text_embeddings, Some(limit), Some(1.01))
.await?;
blocks = crate::reranker::Reranker::rerank_document_blocks(blocks, query);
(vec![], blocks, vec![])
}
"text" => {
let text_embeddings = embeddings
.text_embeddings
.ok_or_else(|| anyhow::anyhow!("No text embeddings for text search"))?;
let mut blocks = store
.get_text_blocks_with_config(text_embeddings, Some(limit), Some(1.01))
.await?;
blocks = crate::reranker::Reranker::rerank_text_blocks(blocks, query);
(vec![], vec![], blocks)
}
"all" => {
let code_embeddings = embeddings
.code_embeddings
.ok_or_else(|| anyhow::anyhow!("No code embeddings for all search"))?;
let text_embeddings = embeddings
.text_embeddings
.ok_or_else(|| anyhow::anyhow!("No text embeddings for all search"))?;
let (mut code_blocks, mut doc_blocks, mut text_blocks) = tokio::try_join!(
store.get_code_blocks_with_config(code_embeddings, Some(limit), Some(1.01)),
store.get_document_blocks_with_config(
text_embeddings.clone(),
Some(limit),
Some(1.01)
),
store.get_text_blocks_with_config(text_embeddings, Some(limit), Some(1.01))
)?;
code_blocks = crate::reranker::Reranker::rerank_code_blocks(code_blocks, query);
doc_blocks = crate::reranker::Reranker::rerank_document_blocks(doc_blocks, query);
text_blocks = crate::reranker::Reranker::rerank_text_blocks(text_blocks, query);
crate::reranker::Reranker::tf_idf_boost(&mut code_blocks, query);
(code_blocks, doc_blocks, text_blocks)
}
_ => unreachable!(),
};
Ok(QuerySearchResultMcp {
query_index,
code_blocks,
doc_blocks,
text_blocks,
})
}
fn deduplicate_and_merge_results_mcp(
search_results: Vec<QuerySearchResultMcp>,
queries: &[String],
distance_threshold: f32,
) -> (
Vec<CodeBlock>,
Vec<crate::store::DocumentBlock>,
Vec<crate::store::TextBlock>,
) {
use std::cmp::Ordering;
use std::collections::HashMap;
let mut code_map: HashMap<String, (CodeBlock, Vec<usize>)> = HashMap::new();
for result in &search_results {
for block in &result.code_blocks {
match code_map.entry(block.hash.clone()) {
std::collections::hash_map::Entry::Vacant(e) => {
e.insert((block.clone(), vec![result.query_index]));
}
std::collections::hash_map::Entry::Occupied(mut e) => {
let (existing_block, query_indices) = e.get_mut();
query_indices.push(result.query_index);
if block.distance < existing_block.distance {
*existing_block = block.clone();
}
}
}
}
}
let mut doc_map: HashMap<String, (crate::store::DocumentBlock, Vec<usize>)> = HashMap::new();
let mut text_map: HashMap<String, (crate::store::TextBlock, Vec<usize>)> = HashMap::new();
for result in &search_results {
for block in &result.doc_blocks {
match doc_map.entry(block.hash.clone()) {
std::collections::hash_map::Entry::Vacant(e) => {
e.insert((block.clone(), vec![result.query_index]));
}
std::collections::hash_map::Entry::Occupied(mut e) => {
let (existing_block, query_indices) = e.get_mut();
query_indices.push(result.query_index);
if block.distance < existing_block.distance {
*existing_block = block.clone();
}
}
}
}
for block in &result.text_blocks {
match text_map.entry(block.hash.clone()) {
std::collections::hash_map::Entry::Vacant(e) => {
e.insert((block.clone(), vec![result.query_index]));
}
std::collections::hash_map::Entry::Occupied(mut e) => {
let (existing_block, query_indices) = e.get_mut();
query_indices.push(result.query_index);
if block.distance < existing_block.distance {
*existing_block = block.clone();
}
}
}
}
}
let mut final_code_blocks: Vec<CodeBlock> = code_map
.into_values()
.map(|(mut block, query_indices)| {
apply_multi_query_bonus_code_mcp(&mut block, &query_indices, queries.len());
block
})
.filter(|block| {
if let Some(distance) = block.distance {
distance <= distance_threshold
} else {
true
}
})
.collect();
let mut final_doc_blocks: Vec<crate::store::DocumentBlock> = doc_map
.into_values()
.map(|(mut block, query_indices)| {
apply_multi_query_bonus_doc_mcp(&mut block, &query_indices, queries.len());
block
})
.filter(|block| {
if let Some(distance) = block.distance {
distance <= distance_threshold
} else {
true
}
})
.collect();
let mut final_text_blocks: Vec<crate::store::TextBlock> = text_map
.into_values()
.map(|(mut block, query_indices)| {
apply_multi_query_bonus_text_mcp(&mut block, &query_indices, queries.len());
block
})
.filter(|block| {
if let Some(distance) = block.distance {
distance <= distance_threshold
} else {
true
}
})
.collect();
final_code_blocks.sort_by(|a, b| match (a.distance, b.distance) {
(Some(dist_a), Some(dist_b)) => dist_a.partial_cmp(&dist_b).unwrap_or(Ordering::Equal),
(Some(_), None) => Ordering::Less,
(None, Some(_)) => Ordering::Greater,
(None, None) => Ordering::Equal,
});
final_doc_blocks.sort_by(|a, b| match (a.distance, b.distance) {
(Some(dist_a), Some(dist_b)) => dist_a.partial_cmp(&dist_b).unwrap_or(Ordering::Equal),
(Some(_), None) => Ordering::Less,
(None, Some(_)) => Ordering::Greater,
(None, None) => Ordering::Equal,
});
final_text_blocks.sort_by(|a, b| match (a.distance, b.distance) {
(Some(dist_a), Some(dist_b)) => dist_a.partial_cmp(&dist_b).unwrap_or(Ordering::Equal),
(Some(_), None) => Ordering::Less,
(None, Some(_)) => Ordering::Greater,
(None, None) => Ordering::Equal,
});
(final_code_blocks, final_doc_blocks, final_text_blocks)
}
fn apply_multi_query_bonus_code_mcp(
block: &mut CodeBlock,
query_indices: &[usize],
total_queries: usize,
) {
if query_indices.len() > 1 && total_queries > 1 {
let coverage_ratio = query_indices.len() as f32 / total_queries as f32;
let bonus_factor = 1.0 - (coverage_ratio * 0.1).min(0.2);
if let Some(distance) = block.distance {
block.distance = Some(distance * bonus_factor);
}
}
}
fn apply_multi_query_bonus_doc_mcp(
block: &mut crate::store::DocumentBlock,
query_indices: &[usize],
total_queries: usize,
) {
if query_indices.len() > 1 && total_queries > 1 {
let coverage_ratio = query_indices.len() as f32 / total_queries as f32;
let bonus_factor = 1.0 - (coverage_ratio * 0.1).min(0.2);
if let Some(distance) = block.distance {
block.distance = Some(distance * bonus_factor);
}
}
}
fn apply_multi_query_bonus_text_mcp(
block: &mut crate::store::TextBlock,
query_indices: &[usize],
total_queries: usize,
) {
if query_indices.len() > 1 && total_queries > 1 {
let coverage_ratio = query_indices.len() as f32 / total_queries as f32;
let bonus_factor = 1.0 - (coverage_ratio * 0.1).min(0.2);
if let Some(distance) = block.distance {
block.distance = Some(distance * bonus_factor);
}
}
}
#[derive(Debug, Clone)]
pub struct QuerySearchResult {
pub query_index: usize,
pub code_blocks: Vec<crate::store::CodeBlock>,
pub doc_blocks: Vec<crate::store::DocumentBlock>,
pub text_blocks: Vec<crate::store::TextBlock>,
}
pub async fn generate_batch_embeddings_for_queries(
queries: &[String],
mode: &str,
config: &Config,
) -> Result<Vec<crate::embedding::SearchModeEmbeddings>> {
match mode {
"code" => {
let code_embeddings = crate::embedding::generate_embeddings_batch(
queries.to_vec(),
true,
config,
crate::embedding::types::InputType::None,
)
.await?;
Ok(code_embeddings
.into_iter()
.map(|emb| crate::embedding::SearchModeEmbeddings {
code_embeddings: Some(emb),
text_embeddings: None,
})
.collect())
}
"docs" | "text" => {
let text_embeddings = crate::embedding::generate_embeddings_batch(
queries.to_vec(),
false,
config,
crate::embedding::types::InputType::Query,
)
.await?;
Ok(text_embeddings
.into_iter()
.map(|emb| crate::embedding::SearchModeEmbeddings {
code_embeddings: None,
text_embeddings: Some(emb),
})
.collect())
}
"all" => {
let code_model = &config.embedding.code_model;
let text_model = &config.embedding.text_model;
if code_model == text_model {
let embeddings = crate::embedding::generate_embeddings_batch(
queries.to_vec(),
true,
config,
crate::embedding::types::InputType::None,
)
.await?;
Ok(embeddings
.into_iter()
.map(|emb| crate::embedding::SearchModeEmbeddings {
code_embeddings: Some(emb.clone()),
text_embeddings: Some(emb),
})
.collect())
} else {
let (code_embeddings, text_embeddings) = tokio::try_join!(
crate::embedding::generate_embeddings_batch(
queries.to_vec(),
true,
config,
crate::embedding::types::InputType::None
),
crate::embedding::generate_embeddings_batch(
queries.to_vec(),
false,
config,
crate::embedding::types::InputType::Query
)
)?;
Ok(code_embeddings
.into_iter()
.zip(text_embeddings.into_iter())
.map(
|(code_emb, text_emb)| crate::embedding::SearchModeEmbeddings {
code_embeddings: Some(code_emb),
text_embeddings: Some(text_emb),
},
)
.collect())
}
}
_ => Err(anyhow::anyhow!("Invalid search mode: {}", mode)),
}
}
pub async fn execute_single_search_with_embeddings(
store: &Store,
embeddings: crate::embedding::SearchModeEmbeddings,
mode: &str,
per_query_limit: usize,
query_index: usize,
similarity_threshold: f32,
language_filter: Option<&str>,
) -> Result<QuerySearchResult> {
let distance_threshold = 1.0 - similarity_threshold;
let mut code_blocks = Vec::new();
let mut doc_blocks = Vec::new();
let mut text_blocks = Vec::new();
match mode {
"code" => {
if let Some(code_emb) = embeddings.code_embeddings {
code_blocks = store
.get_code_blocks_with_language_filter(
code_emb,
Some(per_query_limit),
Some(distance_threshold),
language_filter,
)
.await?;
}
}
"docs" => {
if let Some(text_emb) = embeddings.text_embeddings {
doc_blocks = store
.get_document_blocks_with_config(
text_emb,
Some(per_query_limit),
Some(distance_threshold),
)
.await?;
}
}
"text" => {
if let Some(text_emb) = embeddings.text_embeddings {
text_blocks = store
.get_text_blocks_with_config(
text_emb,
Some(per_query_limit),
Some(distance_threshold),
)
.await?;
}
}
"all" => {
let results_per_type = per_query_limit.div_ceil(3);
if let Some(code_emb) = embeddings.code_embeddings {
code_blocks = store
.get_code_blocks_with_language_filter(
code_emb,
Some(results_per_type),
Some(distance_threshold),
language_filter,
)
.await?;
}
if let Some(text_emb) = embeddings.text_embeddings {
let text_emb_clone = text_emb.clone();
let (text_result, doc_result) = tokio::try_join!(
store.get_text_blocks_with_config(
text_emb,
Some(results_per_type),
Some(distance_threshold),
),
store.get_document_blocks_with_config(
text_emb_clone,
Some(results_per_type),
Some(distance_threshold),
)
)?;
text_blocks = text_result;
doc_blocks = doc_result;
}
}
_ => return Err(anyhow::anyhow!("Invalid search mode: {}", mode)),
}
Ok(QuerySearchResult {
query_index,
code_blocks,
doc_blocks,
text_blocks,
})
}
pub async fn execute_parallel_searches(
store: &Store,
query_embeddings: Vec<(String, crate::embedding::SearchModeEmbeddings)>,
mode: &str,
max_results: usize,
similarity_threshold: f32,
language_filter: Option<&str>,
) -> Result<Vec<QuerySearchResult>> {
let per_query_limit = (max_results * 2) / query_embeddings.len().max(1);
let search_futures: Vec<_> = query_embeddings
.into_iter()
.enumerate()
.map(|(index, (_, embeddings))| async move {
execute_single_search_with_embeddings(
store,
embeddings,
mode,
per_query_limit,
index,
similarity_threshold,
language_filter,
)
.await
})
.collect();
futures::future::try_join_all(search_futures).await
}
pub fn apply_multi_query_bonus_code(
block: &mut crate::store::CodeBlock,
query_indices: &[usize],
total_queries: usize,
) {
if query_indices.len() > 1 && total_queries > 1 {
let coverage_ratio = query_indices.len() as f32 / total_queries as f32;
let bonus_factor = 1.0 - (coverage_ratio * 0.1).min(0.2);
if let Some(distance) = block.distance {
block.distance = Some(distance * bonus_factor);
}
}
}
pub fn apply_multi_query_bonus_doc(
block: &mut crate::store::DocumentBlock,
query_indices: &[usize],
total_queries: usize,
) {
if query_indices.len() > 1 && total_queries > 1 {
let coverage_ratio = query_indices.len() as f32 / total_queries as f32;
let bonus_factor = 1.0 - (coverage_ratio * 0.1).min(0.2);
if let Some(distance) = block.distance {
block.distance = Some(distance * bonus_factor);
}
}
}
pub fn apply_multi_query_bonus_text(
block: &mut crate::store::TextBlock,
query_indices: &[usize],
total_queries: usize,
) {
if query_indices.len() > 1 && total_queries > 1 {
let coverage_ratio = query_indices.len() as f32 / total_queries as f32;
let bonus_factor = 1.0 - (coverage_ratio * 0.1).min(0.2);
if let Some(distance) = block.distance {
block.distance = Some(distance * bonus_factor);
}
}
}
pub fn deduplicate_and_merge_results(
search_results: Vec<QuerySearchResult>,
queries: &[String],
distance_threshold: f32,
) -> (
Vec<crate::store::CodeBlock>,
Vec<crate::store::DocumentBlock>,
Vec<crate::store::TextBlock>,
) {
use std::cmp::Ordering;
use std::collections::HashMap;
let mut code_map: HashMap<String, (crate::store::CodeBlock, Vec<usize>)> = HashMap::new();
for result in &search_results {
for block in &result.code_blocks {
match code_map.entry(block.hash.clone()) {
std::collections::hash_map::Entry::Vacant(e) => {
e.insert((block.clone(), vec![result.query_index]));
}
std::collections::hash_map::Entry::Occupied(mut e) => {
let (existing_block, query_indices) = e.get_mut();
query_indices.push(result.query_index);
if block.distance < existing_block.distance {
*existing_block = block.clone();
existing_block.start_line = block.start_line + 1;
existing_block.end_line = block.end_line + 1;
}
}
}
}
}
let mut doc_map: HashMap<String, (crate::store::DocumentBlock, Vec<usize>)> = HashMap::new();
for result in &search_results {
for block in &result.doc_blocks {
match doc_map.entry(block.hash.clone()) {
std::collections::hash_map::Entry::Vacant(e) => {
e.insert((block.clone(), vec![result.query_index]));
}
std::collections::hash_map::Entry::Occupied(mut e) => {
let (existing_block, query_indices) = e.get_mut();
query_indices.push(result.query_index);
if block.distance < existing_block.distance {
*existing_block = block.clone();
existing_block.start_line = block.start_line + 1;
existing_block.end_line = block.end_line + 1;
}
}
}
}
}
let mut text_map: HashMap<String, (crate::store::TextBlock, Vec<usize>)> = HashMap::new();
for result in &search_results {
for block in &result.text_blocks {
match text_map.entry(block.hash.clone()) {
std::collections::hash_map::Entry::Vacant(e) => {
e.insert((block.clone(), vec![result.query_index]));
}
std::collections::hash_map::Entry::Occupied(mut e) => {
let (existing_block, query_indices) = e.get_mut();
query_indices.push(result.query_index);
if block.distance < existing_block.distance {
*existing_block = block.clone();
existing_block.start_line = block.start_line + 1;
existing_block.end_line = block.end_line + 1;
}
}
}
}
}
let mut final_code_blocks: Vec<crate::store::CodeBlock> = code_map
.into_values()
.map(|(mut block, query_indices)| {
apply_multi_query_bonus_code(&mut block, &query_indices, queries.len());
block
})
.filter(|block| {
if let Some(distance) = block.distance {
distance <= distance_threshold
} else {
true
}
})
.collect();
let mut final_doc_blocks: Vec<crate::store::DocumentBlock> = doc_map
.into_values()
.map(|(mut block, query_indices)| {
apply_multi_query_bonus_doc(&mut block, &query_indices, queries.len());
block
})
.filter(|block| {
if let Some(distance) = block.distance {
distance <= distance_threshold
} else {
true
}
})
.collect();
let mut final_text_blocks: Vec<crate::store::TextBlock> = text_map
.into_values()
.map(|(mut block, query_indices)| {
apply_multi_query_bonus_text(&mut block, &query_indices, queries.len());
block
})
.filter(|block| {
if let Some(distance) = block.distance {
distance <= distance_threshold
} else {
true
}
})
.collect();
final_code_blocks.sort_by(|a, b| match (a.distance, b.distance) {
(Some(dist_a), Some(dist_b)) => dist_a.partial_cmp(&dist_b).unwrap_or(Ordering::Equal),
(Some(_), None) => Ordering::Less,
(None, Some(_)) => Ordering::Greater,
(None, None) => Ordering::Equal,
});
final_doc_blocks.sort_by(|a, b| match (a.distance, b.distance) {
(Some(dist_a), Some(dist_b)) => dist_a.partial_cmp(&dist_b).unwrap_or(Ordering::Equal),
(Some(_), None) => Ordering::Less,
(None, Some(_)) => Ordering::Greater,
(None, None) => Ordering::Equal,
});
final_text_blocks.sort_by(|a, b| match (a.distance, b.distance) {
(Some(dist_a), Some(dist_b)) => dist_a.partial_cmp(&dist_b).unwrap_or(Ordering::Equal),
(Some(_), None) => Ordering::Less,
(None, Some(_)) => Ordering::Greater,
(None, None) => Ordering::Equal,
});
(final_code_blocks, final_doc_blocks, final_text_blocks)
}