use crate::tools::{self, ToolHandler};
use std::collections::HashMap;
use std::sync::LazyLock;
#[cfg(feature = "semantic")]
use crate::error::CodeLensError;
#[cfg(feature = "semantic")]
use crate::protocol::BackendKind;
#[cfg(feature = "semantic")]
use crate::tools::ToolResult;
#[cfg(feature = "semantic")]
use crate::AppState;
#[cfg(feature = "semantic")]
use serde_json::json;
pub(crate) static DISPATCH_TABLE: LazyLock<HashMap<&'static str, ToolHandler>> =
LazyLock::new(|| {
let m = tools::dispatch_table();
#[cfg(feature = "semantic")]
let mut m = m;
#[cfg(feature = "semantic")]
{
m.insert("semantic_search", semantic_search_handler);
m.insert("index_embeddings", index_embeddings_handler);
m.insert("find_similar_code", find_similar_code_handler);
m.insert("find_code_duplicates", |s, a| {
find_code_duplicates_handler(s, a)
});
m.insert("classify_symbol", classify_symbol_handler);
m.insert("find_misplaced_code", |s, a| {
find_misplaced_code_handler(s, a)
});
}
m
});
#[cfg(feature = "semantic")]
fn semantic_search_handler(state: &AppState, arguments: &serde_json::Value) -> ToolResult {
let query = tools::required_string(arguments, "query")?;
let max_results = arguments
.get("max_results")
.and_then(|v| v.as_u64())
.unwrap_or(20) as usize;
let project = state.project();
let guard = state.embedding_engine();
let engine = guard.as_ref().ok_or_else(|| {
anyhow::anyhow!("Embedding engine not available. Build with --features semantic")
})?;
if !engine.is_indexed() {
return Err(CodeLensError::FeatureUnavailable(
"Embedding index is empty. Call index_embeddings first to build the semantic index."
.into(),
));
}
let query_analysis = crate::tools::query_analysis::analyze_retrieval_query(query);
let structural_names: std::collections::HashSet<String> = state
.symbol_index()
.get_ranked_context(&query_analysis.expanded_query, None, 4000, false, 2)
.map(|rc| {
rc.symbols
.into_iter()
.map(|s| format!("{}:{}", s.file, s.name))
.collect()
})
.unwrap_or_default();
let candidate_limit = max_results.saturating_mul(4).clamp(max_results, 80);
let mut results =
crate::tools::symbols::semantic_results_for_query(state, query, candidate_limit, false);
for result in &mut results {
let key = format!("{}:{}", result.file_path, result.symbol_name);
if structural_names.contains(&key) {
result.score += 0.06;
}
}
{
let semantic_scores: std::collections::HashMap<String, f64> = results
.iter()
.map(|r| (format!("{}:{}", r.file_path, r.symbol_name), r.score))
.collect();
let hybrid = codelens_engine::search::search_symbols_hybrid_with_semantic(
&project,
&query_analysis.expanded_query,
candidate_limit,
0.7,
Some(&semantic_scores),
)
.unwrap_or_default();
let mut seen: std::collections::HashSet<String> = results
.iter()
.map(|r| format!("{}:{}:{}", r.file_path, r.symbol_name, r.line))
.collect();
for hr in hybrid {
let key = format!("{}:{}:{}", hr.file, hr.name, hr.line);
if seen.insert(key) {
results.push(codelens_engine::SemanticMatch {
file_path: hr.file,
symbol_name: hr.name,
kind: hr.kind,
line: hr.line,
signature: hr.signature,
name_path: hr.name_path,
score: (hr.score / 100.0) * 0.35,
});
}
}
}
results = crate::tools::query_analysis::rerank_semantic_matches(query, results, max_results);
let result_scores = results
.iter()
.map(|result| {
let (prior_delta, adjusted_score) =
crate::tools::query_analysis::semantic_adjusted_score_parts(query, result);
(
(prior_delta * 1000.0).round() / 1000.0,
(adjusted_score * 1000.0).round() / 1000.0,
)
})
.collect::<Vec<_>>();
let mut payload = json!({
"query": query,
"results": results,
"count": results.len(),
"retrieval": {
"semantic_enabled": true,
"requested_query": query,
"semantic_query": query_analysis.semantic_query,
}
});
if let Some(entries) = payload
.get_mut("results")
.and_then(serde_json::Value::as_array_mut)
{
for (idx, entry) in entries.iter_mut().enumerate() {
if let Some(map) = entry.as_object_mut() {
let (prior_delta, adjusted_score) =
result_scores.get(idx).copied().unwrap_or((0.0, 0.0));
map.insert(
"provenance".to_owned(),
json!({
"source": "semantic",
"retrieval_rank": idx + 1,
"prior_delta": prior_delta,
"adjusted_score": adjusted_score,
}),
);
}
}
}
Ok((payload, tools::success_meta(BackendKind::Semantic, 0.85)))
}
#[cfg(feature = "semantic")]
fn index_embeddings_handler(state: &AppState, _arguments: &serde_json::Value) -> ToolResult {
let project = state.project();
let guard = state.embedding_engine();
let engine = guard
.as_ref()
.ok_or_else(|| anyhow::anyhow!("Embedding engine not available"))?;
let count = engine.index_from_project(&project)?;
let bridges_generated = match engine.generate_bridge_candidates(&project) {
Ok(bridges) if !bridges.is_empty() => {
let bridges_dir = project.as_path().join(".codelens");
let _ = std::fs::create_dir_all(&bridges_dir);
let json_entries: Vec<serde_json::Value> = bridges
.iter()
.map(|(nl, code)| json!({"nl": nl, "code": code}))
.collect();
let _ = std::fs::write(
bridges_dir.join("bridges.json"),
serde_json::to_string_pretty(&json_entries).unwrap_or_default(),
);
bridges.len()
}
_ => 0,
};
Ok((
json!({"indexed_symbols": count, "bridges_generated": bridges_generated, "status": "ok"}),
tools::success_meta(BackendKind::Semantic, 0.95),
))
}
#[cfg(feature = "semantic")]
fn find_similar_code_handler(state: &AppState, arguments: &serde_json::Value) -> ToolResult {
let file_path = tools::required_string(arguments, "file_path")?;
let symbol_name = tools::required_string(arguments, "symbol_name")?;
let max_results = tools::optional_usize(arguments, "max_results", 10);
let min_similarity = arguments
.get("min_similarity")
.and_then(|v| v.as_f64())
.unwrap_or(0.3);
let guard = state.embedding_engine();
let engine = guard
.as_ref()
.ok_or_else(|| anyhow::anyhow!("Embedding engine not available"))?;
let fetch_limit = max_results.saturating_mul(2).clamp(max_results, 40);
let raw_results = engine.find_similar_code(file_path, symbol_name, fetch_limit)?;
let results: Vec<_> = raw_results
.into_iter()
.filter(|r| r.score >= min_similarity)
.take(max_results)
.collect();
Ok((
json!({
"query_symbol": symbol_name,
"file": file_path,
"min_similarity": min_similarity,
"similar": results,
"count": results.len()
}),
tools::success_meta(BackendKind::Semantic, 0.80),
))
}
#[cfg(feature = "semantic")]
fn find_code_duplicates_handler(state: &AppState, arguments: &serde_json::Value) -> ToolResult {
let threshold = arguments
.get("threshold")
.and_then(|v| v.as_f64())
.unwrap_or(0.85);
let max_pairs = arguments
.get("max_pairs")
.and_then(|v| v.as_u64())
.unwrap_or(20) as usize;
let guard = state.embedding_engine();
let engine = guard
.as_ref()
.ok_or_else(|| anyhow::anyhow!("Embedding engine not available"))?;
let pairs = engine.find_duplicates(threshold, max_pairs)?;
Ok((
json!({"threshold": threshold, "duplicates": pairs, "count": pairs.len()}),
tools::success_meta(BackendKind::Semantic, 0.80),
))
}
#[cfg(feature = "semantic")]
fn classify_symbol_handler(state: &AppState, arguments: &serde_json::Value) -> ToolResult {
let file_path = tools::required_string(arguments, "file_path")?;
let symbol_name = tools::required_string(arguments, "symbol_name")?;
let categories = arguments
.get("categories")
.and_then(|v| v.as_array())
.ok_or_else(|| CodeLensError::MissingParam("categories".into()))?;
let cat_strs: Vec<&str> = categories.iter().filter_map(|v| v.as_str()).collect();
let guard = state.embedding_engine();
let engine = guard
.as_ref()
.ok_or_else(|| anyhow::anyhow!("Embedding engine not available"))?;
let scores = engine.classify_symbol(file_path, symbol_name, &cat_strs)?;
Ok((
json!({"symbol": symbol_name, "file": file_path, "classifications": scores}),
tools::success_meta(BackendKind::Semantic, 0.75),
))
}
#[cfg(feature = "semantic")]
fn find_misplaced_code_handler(state: &AppState, arguments: &serde_json::Value) -> ToolResult {
let max_results = arguments
.get("max_results")
.and_then(|v| v.as_u64())
.unwrap_or(10) as usize;
let guard = state.embedding_engine();
let engine = guard
.as_ref()
.ok_or_else(|| anyhow::anyhow!("Embedding engine not available"))?;
let outliers = engine.find_misplaced_code(max_results)?;
Ok((
json!({"outliers": outliers, "count": outliers.len()}),
tools::success_meta(BackendKind::Semantic, 0.70),
))
}
#[cfg(all(test, feature = "semantic"))]
mod tests {
use crate::tools::query_analysis::{analyze_retrieval_query, rerank_semantic_matches};
use codelens_engine::SemanticMatch;
fn semantic_match(file_path: &str, symbol_name: &str, kind: &str, score: f64) -> SemanticMatch {
SemanticMatch {
file_path: file_path.to_owned(),
symbol_name: symbol_name.to_owned(),
kind: kind.to_owned(),
line: 1,
signature: String::new(),
name_path: symbol_name.to_owned(),
score,
}
}
#[test]
fn prefers_extract_entrypoint_over_script_variables() {
let reranked = rerank_semantic_matches(
"extract lines of code into a new function",
vec![
semantic_match(
"scripts/finetune/build_codex_dataset.py",
"line",
"variable",
0.233,
),
semantic_match(
"benchmarks/harness/task-bootstrap.py",
"lines",
"variable",
0.219,
),
semantic_match(
"crates/codelens-mcp/src/tools/composite.rs",
"refactor_extract_function",
"function",
0.184,
),
],
3,
);
assert_eq!(reranked[0].symbol_name, "refactor_extract_function");
}
#[test]
fn prefers_dispatch_entrypoint_over_handler_types() {
let reranked = rerank_semantic_matches(
"route an incoming tool request to the right handler",
vec![
semantic_match(
"crates/codelens-mcp/src/tools/mod.rs",
"ToolHandler",
"unknown",
0.313,
),
semantic_match(
"benchmarks/harness/harness_runner_common.py",
"tool_list",
"variable",
0.266,
),
semantic_match(
"crates/codelens-mcp/src/dispatch.rs",
"dispatch_tool",
"function",
0.224,
),
],
3,
);
assert_eq!(reranked[0].symbol_name, "dispatch_tool");
}
#[test]
fn prefers_stdio_entrypoint_over_generic_read_helpers() {
let reranked = rerank_semantic_matches(
"read input from stdin line by line run_stdio stdio stdin",
vec![
semantic_match(
"crates/codelens-core/src/file_ops/mod.rs",
"read_line_at",
"function",
0.261,
),
semantic_match(
"crates/codelens-core/src/file_ops/reader.rs",
"read_file",
"function",
0.258,
),
semantic_match(
"crates/codelens-mcp/src/server/transport_stdio.rs",
"run_stdio",
"function",
0.148,
),
],
3,
);
assert_eq!(reranked[0].symbol_name, "run_stdio");
}
#[test]
fn prefers_mutation_gate_entrypoint_over_telemetry_helpers() {
let reranked = rerank_semantic_matches(
"mutation gate preflight check before editing evaluate_mutation_gate mutation_gate preflight",
vec![
semantic_match(
"crates/codelens-mcp/src/telemetry.rs",
"record_mutation_preflight_checked",
"function",
0.402,
),
semantic_match(
"crates/codelens-mcp/src/telemetry.rs",
"record_mutation_preflight_gate_denied",
"function",
0.314,
),
semantic_match(
"crates/codelens-mcp/src/mutation_gate.rs",
"evaluate_mutation_gate",
"function",
0.280,
),
],
3,
);
assert_eq!(reranked[0].symbol_name, "evaluate_mutation_gate");
}
#[test]
fn expands_stdio_alias_terms() {
let expanded = analyze_retrieval_query("read input from stdin line by line").expanded_query;
assert!(expanded.contains("run_stdio"));
assert!(expanded.contains("stdio"));
}
}