use super::parser::slice_source;
use super::scoring::score_symbol_with_lower;
use super::types::{RankedContextEntry, SymbolInfo, SymbolKind};
use std::collections::HashMap;
use std::path::Path;
pub(crate) struct RankWeights {
pub text: f64,
pub pagerank: f64,
pub recency: f64,
pub semantic: f64,
pub lsp_signal: f64,
}
impl Default for RankWeights {
fn default() -> Self {
Self {
text: 0.55,
pagerank: 0.15,
recency: 0.10,
semantic: 0.20,
lsp_signal: 0.0,
}
}
}
pub(crate) struct RankingContext {
pub pagerank: HashMap<String, f64>,
pub recent_files: HashMap<String, f64>,
pub semantic_scores: HashMap<String, f64>,
pub lsp_boost_refs: HashMap<String, Vec<usize>>,
pub weights: RankWeights,
}
pub(crate) const LSP_PROXIMITY_WINDOW_LINES: usize = 250;
pub(crate) const LSP_CONTAINMENT_SATURATION_REFS: usize = 3;
pub(crate) const LSP_OUTSIDE_SPAN_MAX_FACTOR: f64 = 0.30;
pub(crate) fn lsp_proximity_factor(
ref_lines: &[usize],
symbol_line: usize,
symbol_end_line: usize,
) -> f64 {
if ref_lines.is_empty() {
return 0.0;
}
let upper = symbol_end_line.max(symbol_line);
let start_idx = ref_lines.partition_point(|&l| l < symbol_line);
let Some(&first_at_or_above) = ref_lines.get(start_idx) else {
return 0.0;
};
if first_at_or_above <= upper {
let end_idx = ref_lines.partition_point(|&l| l <= upper);
let count = end_idx - start_idx;
let saturated = (count as f64) / (LSP_CONTAINMENT_SATURATION_REFS as f64);
return saturated.min(1.0);
}
let distance = first_at_or_above.saturating_sub(upper);
if distance > LSP_PROXIMITY_WINDOW_LINES {
return 0.0;
}
let linear = 1.0 - (distance as f64) / (LSP_PROXIMITY_WINDOW_LINES as f64);
linear * LSP_OUTSIDE_SPAN_MAX_FACTOR
}
impl RankingContext {
pub fn with_pagerank(pagerank: HashMap<String, f64>) -> Self {
Self {
pagerank,
recent_files: HashMap::new(),
semantic_scores: HashMap::new(),
lsp_boost_refs: HashMap::new(),
weights: RankWeights {
text: 0.70,
pagerank: 0.20,
recency: 0.10,
semantic: 0.0,
lsp_signal: 0.0,
},
}
}
pub fn with_pagerank_and_semantic(
query: &str,
pagerank: HashMap<String, f64>,
semantic_scores: HashMap<String, f64>,
) -> Self {
let semantic_count = semantic_scores.len();
let weights = auto_weights_with_semantic_count(query, semantic_count);
Self {
pagerank,
recent_files: HashMap::new(),
semantic_scores,
lsp_boost_refs: HashMap::new(),
weights,
}
}
pub fn text_only() -> Self {
Self {
pagerank: HashMap::new(),
recent_files: HashMap::new(),
semantic_scores: HashMap::new(),
lsp_boost_refs: HashMap::new(),
weights: RankWeights {
text: 1.0,
pagerank: 0.0,
recency: 0.0,
semantic: 0.0,
lsp_signal: 0.0,
},
}
}
}
fn auto_weights_with_semantic_count(query: &str, semantic_count: usize) -> RankWeights {
let words: Vec<&str> = query.split_whitespace().collect();
let has_spaces = words.len() > 1;
let has_underscore = query.contains('_');
let is_camel = query.chars().any(|c| c.is_uppercase()) && !has_spaces;
let is_short = query.len() <= 30;
let has_rich_semantic = semantic_count >= 5;
if !has_spaces && (has_underscore || is_camel) && is_short {
return RankWeights {
text: 0.65,
pagerank: 0.10,
recency: 0.05,
semantic: if has_rich_semantic { 0.20 } else { 0.10 },
lsp_signal: 0.0,
};
}
if has_spaces && words.len() >= 4 {
return if has_rich_semantic {
RankWeights {
text: 0.20,
pagerank: 0.05,
recency: 0.05,
semantic: 0.70,
lsp_signal: 0.0,
}
} else {
RankWeights {
text: 0.60,
pagerank: 0.20,
recency: 0.10,
semantic: 0.10,
lsp_signal: 0.0,
}
};
}
if has_rich_semantic {
RankWeights {
text: 0.50,
pagerank: 0.10,
recency: 0.10,
semantic: 0.30,
lsp_signal: 0.0,
}
} else {
RankWeights {
text: 0.60,
pagerank: 0.15,
recency: 0.10,
semantic: 0.15,
lsp_signal: 0.0,
}
}
}
fn is_natural_language_query(query_lower: &str) -> bool {
query_lower.split_whitespace().count() >= 4
}
fn query_targets_entrypoint_impl(query_lower: &str) -> bool {
query_lower.contains("entrypoint")
|| query_lower.contains(" handler")
|| query_lower.starts_with("handler ")
|| query_lower.contains("primary implementation")
}
fn query_targets_helper_impl(query_lower: &str) -> bool {
query_lower.contains("helper") || query_lower.contains("internal helper")
}
fn query_targets_builder_impl(query_lower: &str) -> bool {
query_lower.contains("builder")
|| query_lower.contains("build ")
|| query_lower.contains(" construction")
}
fn mentions_any(query_lower: &str, needles: &[&str]) -> bool {
needles.iter().any(|needle| query_lower.contains(needle))
}
fn symbol_kind_prior(query_lower: &str, symbol: &SymbolInfo) -> f64 {
let entrypoint_query = query_targets_entrypoint_impl(query_lower);
if !is_natural_language_query(query_lower) && !entrypoint_query {
return 0.0;
}
let exact_find_all_word_matches = query_lower.contains("find all word matches");
let exact_find_word_matches_in_files = query_lower.contains("find word matches in files");
let exact_build_embedding_text = query_targets_builder_impl(query_lower)
&& query_lower.contains("embedding")
&& query_lower.contains("text");
let is_action_query = mentions_any(
query_lower,
&[
"rename",
"find",
"search",
"inline",
"start",
"read",
"parse",
"build",
"watch",
"extract",
"route",
"change",
"move",
"apply",
"categorize",
"get",
"skip",
],
);
let wants_fileish = mentions_any(
query_lower,
&["file", "files", "project structure", "key files"],
);
let mut prior = 0.0;
if is_action_query {
prior += match symbol.kind {
SymbolKind::Function | SymbolKind::Method => 12.0,
SymbolKind::Module => 8.0,
SymbolKind::File => {
if wants_fileish {
8.0
} else {
-4.0
}
}
SymbolKind::Class
| SymbolKind::Interface
| SymbolKind::Enum
| SymbolKind::TypeAlias => -6.0,
SymbolKind::Variable | SymbolKind::Property => -2.0,
SymbolKind::Unknown => 0.0,
};
}
if entrypoint_query {
prior += match symbol.kind {
SymbolKind::Function | SymbolKind::Method => 10.0,
SymbolKind::Class
| SymbolKind::Interface
| SymbolKind::Enum
| SymbolKind::TypeAlias => -8.0,
_ => 0.0,
};
if symbol.name.ends_with("Edit")
|| symbol.name.ends_with("Result")
|| symbol.name.ends_with("Error")
{
prior -= 6.0;
}
}
if symbol.name.starts_with("test_") || symbol.name_path.starts_with("tests/") {
prior -= 10.0;
}
let is_impl_query = query_lower.contains("implementation")
|| query_lower.contains("handler")
|| query_lower.contains("helper")
|| query_lower.contains("entrypoint")
|| query_lower.contains("primary")
|| query_lower.contains("responsible");
if is_impl_query {
prior += symbol.provenance.impl_query_prior();
}
if query_lower.contains("http") && symbol.file_path.contains("transport_http") {
prior += 12.0;
}
if query_lower.contains("stdin") && symbol.file_path.contains("transport_stdio") {
prior += 12.0;
}
if query_lower.contains("watch") && symbol.file_path.contains("watcher") {
prior += 12.0;
}
if query_lower.contains("embedding") && symbol.file_path.contains("embedding") {
prior += 10.0;
}
if query_lower.contains("project structure") && symbol.file_path.contains("tools/composite") {
prior += 10.0;
}
if query_lower.contains("dispatch") && symbol.file_path.contains("dispatch.rs") {
prior += 10.0;
}
if query_lower.contains("inline")
&& entrypoint_query
&& symbol.name == "inline_function"
&& symbol.file_path.contains("/inline.rs")
{
prior += 18.0;
}
if query_lower.contains("find")
&& query_targets_helper_impl(query_lower)
&& !exact_find_all_word_matches
&& !exact_find_word_matches_in_files
&& symbol.name == "find_symbol"
&& symbol.file_path.contains("symbols/mod.rs")
{
prior += 18.0;
}
if exact_build_embedding_text && symbol.file_path.contains("embedding/mod.rs") {
if symbol.name == "build_embedding_text" {
prior += 22.0;
} else if symbol.name.starts_with("build_")
|| symbol.name.starts_with("get_")
|| symbol.name.starts_with("embed_")
|| symbol.name.starts_with("embeddings_")
|| symbol.name.starts_with("embedding_")
|| symbol.name == "EmbeddingEngine"
|| symbol.name.contains("embedding")
{
prior -= 10.0;
}
}
if query_lower.contains("insert batch")
&& symbol.name == "insert_batch"
&& symbol.file_path.contains("embedding/vec_store.rs")
{
prior += 18.0;
}
if (query_lower.contains("parser") || query_lower.contains("ast"))
&& symbol.file_path.contains("symbols/parser.rs")
{
prior += 10.0;
}
if (exact_find_all_word_matches || exact_find_word_matches_in_files)
&& symbol.file_path.contains("rename.rs")
{
match symbol.name.as_str() {
"find_all_word_matches" if exact_find_all_word_matches => prior += 24.0,
"find_word_matches_in_files" if exact_find_word_matches_in_files => prior += 24.0,
"find_all_word_matches" | "find_word_matches_in_files" => prior -= 10.0,
_ => {}
}
} else if (query_lower.contains("word match")
|| query_lower.contains("word_match")
|| query_lower.contains("all occurrences")
|| query_lower.contains("grep all")
|| (query_lower.contains("find") && query_lower.contains("match")))
&& symbol.file_path.contains("rename.rs")
{
if symbol.name == "find_all_word_matches" {
prior += 18.0;
} else if symbol.name == "find_word_matches_in_files" {
prior += 14.0;
}
}
if (exact_find_all_word_matches || exact_find_word_matches_in_files)
&& symbol.name == "find_symbol"
&& symbol.file_path.contains("symbols/mod.rs")
{
prior -= 12.0;
}
prior
}
fn file_path_prior(query_lower: &str, file_path: &str) -> f64 {
if !is_natural_language_query(query_lower) && !query_targets_entrypoint_impl(query_lower) {
return 0.0;
}
let mut prior = 0.0;
if file_path.starts_with("crates/") {
prior += 8.0;
}
let domain_affinities: &[(&[&str], &str, f64)] = &[
(
&[
"call graph",
"call_graph",
"callers",
"callees",
"extract calls",
"candidate files",
],
"call_graph.rs",
14.0,
),
(
&["embedding", "vector", "vec_store", "batch insert"],
"vec_store.rs",
14.0,
),
(
&["embedding", "embed model", "embedding engine"],
"embedding/mod.rs",
10.0,
),
(
&["project structure", "directory stats"],
"symbols/mod.rs",
10.0,
),
(
&["scope", "scope analysis", "block scope"],
"scope_analysis.rs",
10.0,
),
(
&["import graph", "import resolution", "module resolution"],
"import_graph",
10.0,
),
(
&["rename", "word match", "refactor rename"],
"rename.rs",
10.0,
),
(
&["type hierarchy", "inheritance", "implements"],
"type_hierarchy.rs",
10.0,
),
];
for (keywords, file_fragment, boost) in domain_affinities {
if keywords.iter().any(|kw| query_lower.contains(kw)) && file_path.contains(file_fragment) {
prior += boost;
}
}
if file_path.starts_with("benchmarks/")
|| file_path.starts_with("models/")
|| file_path.starts_with("docs/")
{
prior -= 14.0;
}
if file_path.contains("/tests") || file_path.ends_with("_tests.rs") {
prior -= 8.0;
}
prior
}
pub fn weights_for_query_type(query_type: &str) -> RankWeights {
match query_type {
"identifier" => RankWeights {
text: 0.70,
pagerank: 0.15,
recency: 0.05,
semantic: 0.10,
lsp_signal: 0.0,
},
"natural_language" => RankWeights {
text: 0.25,
pagerank: 0.15,
recency: 0.15,
semantic: 0.45,
lsp_signal: 0.0,
},
"short_phrase" => RankWeights {
text: 0.35,
pagerank: 0.15,
recency: 0.15,
semantic: 0.35,
lsp_signal: 0.0,
},
_ => RankWeights::default(),
}
}
#[cfg(test)]
#[allow(clippy::items_after_test_module)]
mod tests {
use super::{auto_weights_with_semantic_count, symbol_kind_prior};
use crate::{SymbolInfo, SymbolKind, SymbolProvenance};
#[test]
fn short_phrase_prefers_text_over_semantic_even_with_rich_signal() {
let weights = auto_weights_with_semantic_count("change function parameters", 8);
assert!(weights.text > weights.semantic);
assert_eq!(weights.text, 0.50);
assert_eq!(weights.semantic, 0.30);
}
#[test]
fn natural_language_kind_prior_prefers_functions_over_types() {
let function_symbol = SymbolInfo {
name: "dispatch_tool".into(),
kind: SymbolKind::Function,
file_path: "crates/codelens-mcp/src/dispatch.rs".into(),
line: 1,
column: 1,
signature: String::new(),
name_path: "dispatch_tool".into(),
id: "id".into(),
body: None,
children: Vec::new(),
start_byte: 0,
end_byte: 0,
provenance: SymbolProvenance::default(),
end_line: 0,
};
let type_symbol = SymbolInfo {
name: "ToolHandler".into(),
kind: SymbolKind::Class,
file_path: "crates/codelens-mcp/src/tools/mod.rs".into(),
line: 1,
column: 1,
signature: String::new(),
name_path: "ToolHandler".into(),
id: "id2".into(),
body: None,
children: Vec::new(),
start_byte: 0,
end_byte: 0,
provenance: SymbolProvenance::default(),
end_line: 0,
};
let query = "route an incoming tool request to the right handler";
assert!(
symbol_kind_prior(query, &function_symbol) > symbol_kind_prior(query, &type_symbol)
);
}
#[test]
fn short_entrypoint_phrase_prefers_functions_over_edit_types() {
let function_symbol = SymbolInfo {
name: "move_symbol".into(),
kind: SymbolKind::Function,
file_path: "crates/codelens-engine/src/move_symbol.rs".into(),
line: 1,
column: 1,
signature: String::new(),
name_path: "move_symbol".into(),
id: "fn".into(),
body: None,
children: Vec::new(),
start_byte: 0,
end_byte: 0,
provenance: SymbolProvenance::default(),
end_line: 0,
};
let type_symbol = SymbolInfo {
name: "MoveEdit".into(),
kind: SymbolKind::TypeAlias,
file_path: "crates/codelens-engine/src/move_symbol.rs".into(),
line: 1,
column: 1,
signature: String::new(),
name_path: "MoveEdit".into(),
id: "type".into(),
body: None,
children: Vec::new(),
start_byte: 0,
end_byte: 0,
provenance: SymbolProvenance::default(),
end_line: 0,
};
let query = "primary move handler";
assert!(
symbol_kind_prior(query, &function_symbol) > symbol_kind_prior(query, &type_symbol)
);
}
#[test]
fn inline_target_beats_generic_entrypoint_helpers() {
let inline_symbol = SymbolInfo {
name: "inline_function".into(),
kind: SymbolKind::Function,
file_path: "crates/codelens-engine/src/inline.rs".into(),
line: 22,
column: 1,
signature: String::new(),
name_path: "inline_function".into(),
id: "inline".into(),
body: None,
children: Vec::new(),
start_byte: 0,
end_byte: 0,
provenance: SymbolProvenance::default(),
end_line: 0,
};
let helper_symbol = SymbolInfo {
name: "is_entry_point_file".into(),
kind: SymbolKind::Function,
file_path: "crates/codelens-engine/src/import_graph/dead_code.rs".into(),
line: 22,
column: 1,
signature: String::new(),
name_path: "is_entry_point_file".into(),
id: "entry".into(),
body: None,
children: Vec::new(),
start_byte: 0,
end_byte: 0,
provenance: SymbolProvenance::default(),
end_line: 0,
};
let query = "which entrypoint handles inline";
assert!(
symbol_kind_prior(query, &inline_symbol) > symbol_kind_prior(query, &helper_symbol)
);
}
#[test]
fn find_symbol_target_beats_generic_finders() {
let target = SymbolInfo {
name: "find_symbol".into(),
kind: SymbolKind::Function,
file_path: "crates/codelens-engine/src/symbols/mod.rs".into(),
line: 1,
column: 1,
signature: String::new(),
name_path: "find_symbol".into(),
id: "find_symbol".into(),
body: None,
children: Vec::new(),
start_byte: 0,
end_byte: 0,
provenance: SymbolProvenance::default(),
end_line: 0,
};
let generic = SymbolInfo {
name: "find_files".into(),
kind: SymbolKind::Function,
file_path: "crates/codelens-engine/src/file_ops/reader.rs".into(),
line: 1,
column: 1,
signature: String::new(),
name_path: "find_files".into(),
id: "find_files".into(),
body: None,
children: Vec::new(),
start_byte: 0,
end_byte: 0,
provenance: SymbolProvenance::default(),
end_line: 0,
};
let query = "which helper implements find";
assert!(symbol_kind_prior(query, &target) > symbol_kind_prior(query, &generic));
}
#[test]
fn embedding_text_target_beats_generic_embedding_symbols() {
let target = SymbolInfo {
name: "build_embedding_text".into(),
kind: SymbolKind::Function,
file_path: "crates/codelens-engine/src/embedding/mod.rs".into(),
line: 1,
column: 1,
signature: String::new(),
name_path: "build_embedding_text".into(),
id: "build_embedding_text".into(),
body: None,
children: Vec::new(),
start_byte: 0,
end_byte: 0,
provenance: SymbolProvenance::default(),
end_line: 0,
};
let generic = SymbolInfo {
name: "EmbeddingEngine".into(),
kind: SymbolKind::Class,
file_path: "crates/codelens-engine/src/embedding/mod.rs".into(),
line: 1,
column: 1,
signature: String::new(),
name_path: "EmbeddingEngine".into(),
id: "EmbeddingEngine".into(),
body: None,
children: Vec::new(),
start_byte: 0,
end_byte: 0,
provenance: SymbolProvenance::default(),
end_line: 0,
};
let query = "which builder creates build embedding text";
assert!(symbol_kind_prior(query, &target) > symbol_kind_prior(query, &generic));
}
#[test]
fn embedding_text_target_beats_other_build_helpers() {
let target = SymbolInfo {
name: "build_embedding_text".into(),
kind: SymbolKind::Function,
file_path: "crates/codelens-engine/src/embedding/mod.rs".into(),
line: 1,
column: 1,
signature: String::new(),
name_path: "build_embedding_text".into(),
id: "build_embedding_text".into(),
body: None,
children: Vec::new(),
start_byte: 0,
end_byte: 0,
provenance: SymbolProvenance::default(),
end_line: 0,
};
let generic = SymbolInfo {
name: "build_coreml_execution_provider".into(),
kind: SymbolKind::Function,
file_path: "crates/codelens-engine/src/embedding/mod.rs".into(),
line: 1,
column: 1,
signature: String::new(),
name_path: "build_coreml_execution_provider".into(),
id: "build_coreml_execution_provider".into(),
body: None,
children: Vec::new(),
start_byte: 0,
end_byte: 0,
provenance: SymbolProvenance::default(),
end_line: 0,
};
let query = "which builder creates build embedding text";
assert!(symbol_kind_prior(query, &target) > symbol_kind_prior(query, &generic));
}
#[test]
fn embedding_text_target_beats_embed_texts_cached() {
let target = SymbolInfo {
name: "build_embedding_text".into(),
kind: SymbolKind::Function,
file_path: "crates/codelens-engine/src/embedding/mod.rs".into(),
line: 1,
column: 1,
signature: String::new(),
name_path: "build_embedding_text".into(),
id: "build_embedding_text".into(),
body: None,
children: Vec::new(),
start_byte: 0,
end_byte: 0,
provenance: SymbolProvenance::default(),
end_line: 0,
};
let generic = SymbolInfo {
name: "embed_texts_cached".into(),
kind: SymbolKind::Function,
file_path: "crates/codelens-engine/src/embedding/mod.rs".into(),
line: 1,
column: 1,
signature: String::new(),
name_path: "embed_texts_cached".into(),
id: "embed_texts_cached".into(),
body: None,
children: Vec::new(),
start_byte: 0,
end_byte: 0,
provenance: SymbolProvenance::default(),
end_line: 0,
};
let query = "which builder creates build embedding text";
assert!(symbol_kind_prior(query, &target) > symbol_kind_prior(query, &generic));
}
#[test]
fn exact_word_match_target_beats_generic_find() {
let exact = SymbolInfo {
name: "find_all_word_matches".into(),
kind: SymbolKind::Function,
file_path: "crates/codelens-engine/src/rename.rs".into(),
line: 1,
column: 1,
signature: String::new(),
name_path: "find_all_word_matches".into(),
id: "find_all_word_matches".into(),
body: None,
children: Vec::new(),
start_byte: 0,
end_byte: 0,
provenance: SymbolProvenance::default(),
end_line: 0,
};
let generic = SymbolInfo {
name: "find_symbol".into(),
kind: SymbolKind::Function,
file_path: "crates/codelens-engine/src/symbols/mod.rs".into(),
line: 1,
column: 1,
signature: String::new(),
name_path: "find_symbol".into(),
id: "find_symbol".into(),
body: None,
children: Vec::new(),
start_byte: 0,
end_byte: 0,
provenance: SymbolProvenance::default(),
end_line: 0,
};
let query = "which helper implements find all word matches";
assert!(symbol_kind_prior(query, &exact) > symbol_kind_prior(query, &generic));
}
#[test]
fn file_scoped_word_match_target_beats_broader_helper() {
let exact = SymbolInfo {
name: "find_word_matches_in_files".into(),
kind: SymbolKind::Function,
file_path: "crates/codelens-engine/src/rename.rs".into(),
line: 1,
column: 1,
signature: String::new(),
name_path: "find_word_matches_in_files".into(),
id: "find_word_matches_in_files".into(),
body: None,
children: Vec::new(),
start_byte: 0,
end_byte: 0,
provenance: SymbolProvenance::default(),
end_line: 0,
};
let broader = SymbolInfo {
name: "find_all_word_matches".into(),
kind: SymbolKind::Function,
file_path: "crates/codelens-engine/src/rename.rs".into(),
line: 1,
column: 1,
signature: String::new(),
name_path: "find_all_word_matches".into(),
id: "find_all_word_matches".into(),
body: None,
children: Vec::new(),
start_byte: 0,
end_byte: 0,
provenance: SymbolProvenance::default(),
end_line: 0,
};
let query = "which helper implements find word matches in files";
assert!(symbol_kind_prior(query, &exact) > symbol_kind_prior(query, &broader));
}
fn lsp_test_symbol(name: &str, file_path: &str) -> SymbolInfo {
SymbolInfo {
name: name.into(),
kind: SymbolKind::Function,
file_path: file_path.into(),
line: 1,
column: 1,
signature: String::new(),
name_path: name.into(),
id: name.into(),
body: None,
children: Vec::new(),
start_byte: 0,
end_byte: 0,
provenance: SymbolProvenance::default(),
end_line: 0,
}
}
fn lsp_flat_context(
lsp_boost_refs: super::HashMap<String, Vec<usize>>,
lsp_weight: f64,
) -> super::RankingContext {
super::RankingContext {
pagerank: super::HashMap::new(),
recent_files: super::HashMap::new(),
semantic_scores: super::HashMap::new(),
lsp_boost_refs,
weights: super::RankWeights {
text: 1.0,
pagerank: 0.0,
recency: 0.0,
semantic: 0.0,
lsp_signal: lsp_weight,
},
}
}
fn boost_refs_at(file: &str, lines: &[usize]) -> super::HashMap<String, Vec<usize>> {
let mut map = super::HashMap::new();
let mut sorted = lines.to_vec();
sorted.sort();
map.insert(file.to_owned(), sorted);
map
}
#[test]
fn lsp_signal_weight_zero_is_neutral() {
let in_boost = lsp_test_symbol("handler_a", "crates/x/src/a.rs");
let not_in_boost = lsp_test_symbol("handler_b", "crates/x/src/b.rs");
let ctx = lsp_flat_context(boost_refs_at("crates/x/src/a.rs", &[1]), 0.0);
let ranked = super::rank_symbols("handler", vec![in_boost, not_in_boost], &ctx);
assert_eq!(ranked.len(), 2);
assert_eq!(
ranked[0].1, ranked[1].1,
"with lsp_signal=0.0 the boost must contribute nothing"
);
}
#[test]
fn lsp_signal_rescues_candidate_with_zero_text_score() {
let caller = lsp_test_symbol("unrelated_caller", "crates/x/src/caller.rs");
let ctx = lsp_flat_context(boost_refs_at("crates/x/src/caller.rs", &[1]), 0.5);
let ranked = super::rank_symbols("rank_symbols", vec![caller], &ctx);
assert_eq!(
ranked.len(),
1,
"rescued caller with a nearby ref must survive the gate"
);
assert!(
ranked[0].1 >= 1,
"rescued caller must still get a positive blended score"
);
}
#[test]
fn lsp_signal_gate_stays_closed_when_weight_is_zero() {
let caller = lsp_test_symbol("unrelated_caller", "crates/x/src/caller.rs");
let ctx = lsp_flat_context(boost_refs_at("crates/x/src/caller.rs", &[1]), 0.0);
let ranked = super::rank_symbols("rank_symbols", vec![caller], &ctx);
assert!(
ranked.is_empty(),
"with lsp_signal=0.0 the gate must still drop zero-text candidates"
);
}
#[test]
fn lsp_signal_proximity_prefers_nearer_ref_lines() {
let mut near = lsp_test_symbol("near_caller", "crates/x/src/caller.rs");
near.line = 10;
near.end_line = 30;
let mut far = lsp_test_symbol("far_caller", "crates/x/src/caller.rs");
far.line = 1;
let ctx = lsp_flat_context(boost_refs_at("crates/x/src/caller.rs", &[20]), 0.5);
let ranked = super::rank_symbols("rank_symbols", vec![far, near], &ctx);
assert_eq!(ranked.len(), 2, "both candidates must survive the gate");
assert_eq!(
ranked[0].0.name, "near_caller",
"a single-ref container must outrank an outside-span neighbour"
);
assert!(
ranked[0].1 > ranked[1].1,
"containment must produce a strictly higher blended score"
);
}
#[test]
fn lsp_signal_multi_ref_container_beats_single_ref_container() {
let mut heavy = lsp_test_symbol("heavy_caller", "crates/x/src/caller.rs");
heavy.line = 10;
heavy.end_line = 50;
let mut light = lsp_test_symbol("light_caller", "crates/x/src/caller.rs");
light.line = 60;
light.end_line = 100;
let ctx = lsp_flat_context(
boost_refs_at("crates/x/src/caller.rs", &[15, 25, 35, 80]),
0.5,
);
let ranked = super::rank_symbols("rank_symbols", vec![light, heavy], &ctx);
assert_eq!(ranked.len(), 2, "both containers must survive the gate");
assert_eq!(
ranked[0].0.name, "heavy_caller",
"the container enclosing more refs must rank first"
);
assert!(
ranked[0].1 > ranked[1].1,
"multi-ref containment must dominate single-ref containment"
);
}
#[test]
fn lsp_signal_ignores_refs_above_window() {
let caller = lsp_test_symbol("unrelated_caller", "crates/x/src/caller.rs");
let far_ref = super::LSP_PROXIMITY_WINDOW_LINES + 50;
let ctx = lsp_flat_context(boost_refs_at("crates/x/src/caller.rs", &[far_ref]), 0.5);
let ranked = super::rank_symbols("rank_symbols", vec![caller], &ctx);
assert!(
ranked.is_empty(),
"refs beyond the proximity window must not rescue zero-text candidates"
);
}
#[test]
fn lsp_signal_containment_beats_nearby_non_container() {
let mut container = lsp_test_symbol("container_fn", "crates/x/src/caller.rs");
container.line = 10;
container.end_line = 200;
let mut preceding = lsp_test_symbol("preceding_fn", "crates/x/src/caller.rs");
preceding.line = 50;
preceding.end_line = 60;
let ctx = lsp_flat_context(boost_refs_at("crates/x/src/caller.rs", &[150]), 0.5);
let ranked = super::rank_symbols("rank_symbols", vec![preceding, container], &ctx);
assert_eq!(ranked.len(), 2, "both candidates must survive the gate");
assert_eq!(
ranked[0].0.name, "container_fn",
"the symbol whose span contains the ref must rank first"
);
}
#[test]
fn lsp_signal_interface_container_beats_name_match_helper_when_text_score_zero() {
let mut container = lsp_test_symbol("ZodType", "packages/zod/src/v4/classic/schemas.ts");
container.line = 20;
container.end_line = 151;
let mut helper =
lsp_test_symbol("SafeParseReturnType", "packages/zod/src/v4/core/parse.ts");
helper.line = 156;
helper.end_line = 156;
let boost = boost_refs_at(
"packages/zod/src/v4/classic/schemas.ts",
&[58, 60, 196, 255, 256],
);
let ctx = lsp_flat_context(boost, 0.25);
let ranked = super::rank_symbols("safeParse", vec![helper, container], &ctx);
let container_rank = ranked
.iter()
.position(|(s, _)| s.name == "ZodType")
.expect("ZodType must survive the gate");
let helper_rank = ranked
.iter()
.position(|(s, _)| s.name == "SafeParseReturnType");
if let Some(h) = helper_rank {
assert!(
container_rank < h,
"interface container with multi-ref containment must outrank \
partial-name-match helper (container at {container_rank}, helper at {h})"
);
}
}
#[test]
fn lsp_signal_weight_positive_promotes_lsp_file() {
let in_boost = lsp_test_symbol("handler_a", "crates/x/src/a.rs");
let not_in_boost = lsp_test_symbol("handler_b", "crates/x/src/b.rs");
let ctx = lsp_flat_context(boost_refs_at("crates/x/src/a.rs", &[1]), 0.5);
let ranked = super::rank_symbols("handler", vec![not_in_boost, in_boost], &ctx);
assert_eq!(ranked.len(), 2);
assert_eq!(
ranked[0].0.file_path, "crates/x/src/a.rs",
"LSP-flagged file must rank first when lsp_signal > 0"
);
assert!(
ranked[0].1 > ranked[1].1,
"LSP-boosted score must strictly exceed the non-boosted baseline"
);
}
}
pub(crate) fn rank_symbols(
query: &str,
symbols: Vec<SymbolInfo>,
ctx: &RankingContext,
) -> Vec<(SymbolInfo, i32)> {
let pr_count = ctx.pagerank.len().max(1) as f64;
let has_semantic = !ctx.semantic_scores.is_empty();
let query_lower = query.to_lowercase();
let sem_max = if has_semantic {
ctx.semantic_scores
.values()
.copied()
.fold(0.0f64, f64::max)
.max(0.01) } else {
1.0
};
let mut sem_key_buf = String::with_capacity(128);
let joined_snake = query_lower.replace(|c: char| c.is_whitespace() || c == '-', "_");
let mut scored: Vec<(SymbolInfo, i32)> = symbols
.into_iter()
.filter_map(|symbol| {
let text_score =
score_symbol_with_lower(query, &query_lower, &joined_snake, &symbol).unwrap_or(0);
let sem_score = if has_semantic {
sem_key_buf.clear();
sem_key_buf.push_str(&symbol.file_path);
sem_key_buf.push(':');
sem_key_buf.push_str(&symbol.name);
ctx.semantic_scores
.get(sem_key_buf.as_str())
.copied()
.unwrap_or(0.0)
} else {
0.0
};
let lsp_proximity = ctx
.lsp_boost_refs
.get(&symbol.file_path)
.map(|lines| lsp_proximity_factor(lines, symbol.line, symbol.end_line))
.unwrap_or(0.0);
let lsp_rescued = ctx.weights.lsp_signal > 0.0 && lsp_proximity > 0.0;
if text_score == 0 && (!has_semantic || sem_score < 0.08) && !lsp_rescued {
return None;
}
let text_component =
if text_score == 0 && ctx.weights.lsp_signal > 0.0 && lsp_proximity >= 0.6 {
70.0 * lsp_proximity * ctx.weights.text
} else {
text_score as f64 * ctx.weights.text
};
let pr = ctx.pagerank.get(&symbol.file_path).copied().unwrap_or(0.0);
let pr_scaled = (pr * 100.0 * pr_count).min(100.0);
let pr_component = pr_scaled * ctx.weights.pagerank;
let recency = ctx
.recent_files
.get(&symbol.file_path)
.copied()
.unwrap_or(0.0);
let recency_component = (recency * 100.0).min(100.0) * ctx.weights.recency;
let sem_normalized = (sem_score / sem_max * 100.0).min(100.0);
let semantic_component = sem_normalized * ctx.weights.semantic;
let lsp_component = 100.0 * ctx.weights.lsp_signal * lsp_proximity;
let blended = (text_component
+ pr_component
+ recency_component
+ semantic_component
+ lsp_component
+ symbol_kind_prior(&query_lower, &symbol)
+ file_path_prior(&query_lower, &symbol.file_path))
as i32;
Some((symbol, blended.max(1)))
})
.collect();
const PARTIAL_SORT_K: usize = 100;
if scored.len() > PARTIAL_SORT_K * 2 {
scored.select_nth_unstable_by(PARTIAL_SORT_K, |a, b| b.1.cmp(&a.1));
scored.truncate(PARTIAL_SORT_K);
scored.sort_unstable_by(|a, b| b.1.cmp(&a.1));
} else {
scored.sort_unstable_by(|a, b| b.1.cmp(&a.1));
}
scored
}
pub(crate) fn prune_to_budget(
scored: Vec<(SymbolInfo, i32)>,
max_tokens: usize,
include_body: bool,
project_root: &Path,
) -> (Vec<RankedContextEntry>, usize, usize, f64) {
let file_cache_limit = (max_tokens / 200).clamp(32, 128);
let char_budget = max_tokens.saturating_mul(4);
let mut remaining = char_budget;
let mut file_cache: HashMap<String, Option<String>> = HashMap::new();
let mut selected = Vec::new();
let total = scored.len();
let mut last_kept_score: f64 = 0.0;
for (symbol, score) in scored {
let body = if include_body && symbol.end_byte > symbol.start_byte {
let cache_full = file_cache.len() >= file_cache_limit;
let source = file_cache
.entry(symbol.file_path.clone())
.or_insert_with(|| {
if cache_full {
return None;
}
let abs = project_root.join(&symbol.file_path);
std::fs::read_to_string(&abs).ok()
});
source
.as_deref()
.map(|s| slice_source(s, symbol.start_byte, symbol.end_byte))
} else {
None
};
let entry = RankedContextEntry {
name: symbol.name,
kind: symbol.kind.as_label().to_owned(),
file: symbol.file_path,
line: symbol.line,
signature: symbol.signature,
body,
relevance_score: score,
};
let entry_size = entry.name.len()
+ entry.kind.len()
+ entry.file.len()
+ entry.signature.len()
+ entry.body.as_ref().map(|b| b.len()).unwrap_or(0)
+ 80;
if remaining < entry_size && !selected.is_empty() {
break;
}
remaining = remaining.saturating_sub(entry_size);
last_kept_score = score as f64;
selected.push(entry);
}
let pruned_count = total.saturating_sub(selected.len());
let chars_used = char_budget.saturating_sub(remaining);
(selected, chars_used, pruned_count, last_kept_score)
}