use std::collections::{BTreeMap, BTreeSet};
use std::path::Path;
use crate::ranking::boosting::is_symbol_query;
use crate::ranking::penalties::file_path_penalty;
use crate::tokens::tokenize;
use crate::{SearchResult, SourceIndex};
use super::{
exact_shard, AsrError, AsrResult, AsrSearchResult, ReadyIndexSnapshot, MAX_TOP_K,
RANK_BOOST_DEFINES_SYMBOL, RANK_BOOST_FILE_NAME_CONTAINS, RANK_BOOST_FILE_STEM_EXACT,
RANK_BOOST_PATH_CONTAINS, RANK_THRESHOLD_EXACT_INDEX,
};
pub(crate) fn search_results_from_snapshot(
query: &str,
snapshot: &ReadyIndexSnapshot,
top_k: usize,
) -> AsrResult<Vec<SearchResult>> {
let index = index_from_snapshot(&snapshot.chunks)?;
let candidate_count = top_k.saturating_mul(4).clamp(16, MAX_TOP_K);
let backend_results = index.search(query, candidate_count, None, None, None);
let exact_results = snapshot.exact_shard.search(query, candidate_count);
Ok(merge_search_results(
query,
exact_results,
backend_results,
top_k,
))
}
pub(crate) fn asr_search_result(query: &str, result: &SearchResult) -> AsrSearchResult {
AsrSearchResult {
path: result.chunk.file_path.clone(),
start_line: result.chunk.start_line,
end_line: result.chunk.end_line,
language: result.chunk.language.clone(),
score: round_score(result.score),
reasons: reasons_for_result(query, result),
match_lines: result.match_lines.clone(),
}
}
pub(crate) fn reasons_for_result(query: &str, result: &SearchResult) -> Vec<String> {
let mut reasons = Vec::new();
let trimmed = query.trim();
let literal_query = trimmed.strip_prefix("re:").unwrap_or(trimmed);
let lower_query = literal_query.to_ascii_lowercase();
let lower_path = result.chunk.file_path.to_ascii_lowercase();
let lower_content = result.chunk.content.to_ascii_lowercase();
if !result.match_lines.is_empty() {
reasons.push("verified_match".to_string());
if trimmed.strip_prefix("re:").is_some() {
reasons.push("regex_verified_match".to_string());
} else {
reasons.push("literal_verified_match".to_string());
}
}
if !lower_query.is_empty() && lower_path.contains(&lower_query) {
reasons.push("path_match".to_string());
}
if !lower_query.is_empty() && lower_content.contains(&lower_query) {
reasons.push("content_match".to_string());
}
if result.score >= exact_shard::persistent_exact_score_floor() {
reasons.push("persistent_exact_shard".to_string());
} else if result.score >= RANK_THRESHOLD_EXACT_INDEX {
reasons.push("exact_index".to_string());
}
if reasons.is_empty() {
reasons.push("bm25_chunk_match".to_string());
}
reasons.sort();
reasons.dedup();
reasons
}
pub(crate) fn round_score(score: f64) -> f64 {
(score * 1000.0).round() / 1000.0
}
fn index_from_snapshot(chunks: &[crate::Chunk]) -> AsrResult<SourceIndex> {
SourceIndex::from_chunks_bm25(chunks.to_vec()).map_err(|err| {
AsrError::new(
"repo_index_corrupt",
format!("Failed to build search index from stored chunks: {err:#}"),
)
})
}
fn merge_search_results(
query: &str,
exact_results: Vec<SearchResult>,
backend_results: Vec<SearchResult>,
top_k: usize,
) -> Vec<SearchResult> {
let mut merged: Vec<SearchResult> = Vec::new();
let mut positions: BTreeMap<(String, usize, usize), usize> = BTreeMap::new();
for mut result in exact_results.into_iter().chain(backend_results.into_iter()) {
let key = (
result.chunk.file_path.clone(),
result.chunk.start_line,
result.chunk.end_line,
);
if let Some(pos) = positions.get(&key).copied() {
let existing = &mut merged[pos];
if result.score > existing.score {
existing.score = result.score;
}
for line in result.match_lines.drain(..) {
if !existing
.match_lines
.iter()
.any(|existing| existing.line == line.line && existing.content == line.content)
{
existing.match_lines.push(line);
}
}
existing
.match_lines
.sort_by(|a, b| a.line.cmp(&b.line).then_with(|| a.content.cmp(&b.content)));
existing
.match_lines
.dedup_by(|a, b| a.line == b.line && a.content == b.content);
} else {
positions.insert(key, merged.len());
merged.push(result);
}
}
let matcher = SymbolMatcher::from_query(query);
let mut scored: Vec<(f64, SearchResult)> = merged
.into_iter()
.map(|r| (asr_result_rank_score_impl(query, &r, matcher.as_ref()), r))
.collect();
scored.sort_by(|(sa, a), (sb, b)| {
sb.partial_cmp(sa)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| {
b.score
.partial_cmp(&a.score)
.unwrap_or(std::cmp::Ordering::Equal)
})
.then_with(|| a.chunk.file_path.cmp(&b.chunk.file_path))
.then_with(|| a.chunk.start_line.cmp(&b.chunk.start_line))
.then_with(|| a.chunk.end_line.cmp(&b.chunk.end_line))
});
scored.truncate(top_k);
scored.into_iter().map(|(_, r)| r).collect()
}
struct SymbolMatcher {
patterns: [regex::Regex; 3],
}
impl SymbolMatcher {
fn from_query(query: &str) -> Option<Self> {
let symbol = query
.trim()
.trim_start_matches("re:")
.rsplit([':', '.', '/', '\\'])
.next()
.unwrap_or("")
.trim();
if symbol.len() < 3 || !is_symbol_query(symbol) {
return None;
}
let e = regex::escape(symbol);
let p0 = regex::Regex::new(&format!(
r"(?m)(?:^|\s)(?:pub\s+|public\s+|private\s+|internal\s+|fileprivate\s+|open\s+|export\s+|async\s+|static\s+)*{}\s+{}(?:\s|[<({{\[=:;]|$)",
r"(?:fn|func|fun|function|class|struct|enum|trait|protocol|interface|type|actor|object|const|let|var)",
e
)).ok()?;
let p1 = regex::Regex::new(&format!(
r"(?m)(?:^|\s)(?:data\s+class|abstract\s+class)\s+{}(?:\s|[<({{\[=:;]|$)",
e
))
.ok()?;
let p2 = regex::Regex::new(&format!(
r"(?m)(?:^|\s)func\s+\([^)]*\)\s+{}(?:\s|[<({{\[=:;]|$)",
e
))
.ok()?;
Some(Self {
patterns: [p0, p1, p2],
})
}
fn defines_symbol(&self, content: &str) -> bool {
self.patterns.iter().any(|p| p.is_match(content))
}
}
fn asr_result_rank_score_impl(
query: &str,
result: &SearchResult,
matcher: Option<&SymbolMatcher>,
) -> f64 {
let mut score = result.score;
let normalized_query = query
.trim()
.trim_start_matches("re:")
.replace('\\', "/")
.to_ascii_lowercase();
if normalized_query.is_empty() {
return score;
}
let path = result
.chunk
.file_path
.replace('\\', "/")
.to_ascii_lowercase();
let file_name_str = Path::new(&result.chunk.file_path)
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("");
let file_stem = Path::new(&result.chunk.file_path)
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("")
.to_ascii_lowercase();
let file_name = file_name_str.to_ascii_lowercase();
score *= file_path_penalty(&result.chunk.file_path);
score *= query_coverage_multiplier(&normalized_query, result);
score *= intent_path_multiplier(&normalized_query, &path, &file_name);
if let Some(m) = matcher {
if m.defines_symbol(&result.chunk.content) {
score += RANK_BOOST_DEFINES_SYMBOL;
}
}
if file_stem == normalized_query {
score += RANK_BOOST_FILE_STEM_EXACT;
} else if file_name.contains(&normalized_query) {
score += RANK_BOOST_FILE_NAME_CONTAINS;
} else if path.contains(&normalized_query) {
score += RANK_BOOST_PATH_CONTAINS;
}
score
}
fn intent_path_multiplier(normalized_query: &str, path: &str, file_name: &str) -> f64 {
if query_has_ui_intent(normalized_query) {
return 1.0;
}
let is_ui_path = path.contains("/components/")
|| path.contains("/views/")
|| path.contains("/screens/")
|| file_name.ends_with("view.swift")
|| file_name.ends_with("screen.swift")
|| file_name.ends_with("component.swift")
|| file_name.ends_with(".view.tsx")
|| file_name.ends_with(".view.ts")
|| file_name.ends_with(".component.tsx")
|| file_name.ends_with(".component.ts")
|| file_name.ends_with(".jsx")
|| file_name.ends_with(".tsx");
if is_ui_path {
0.72
} else {
1.0
}
}
fn query_has_ui_intent(normalized_query: &str) -> bool {
let terms = meaningful_query_terms(normalized_query);
terms.iter().any(|term| {
matches!(
term.as_str(),
"view"
| "views"
| "screen"
| "screens"
| "ui"
| "ux"
| "layout"
| "component"
| "components"
| "button"
| "design"
| "style"
| "css"
| "animation"
)
})
}
fn query_coverage_multiplier(normalized_query: &str, result: &SearchResult) -> f64 {
let terms = meaningful_query_terms(normalized_query);
if terms.len() < 2 {
return 1.0;
}
let content = result.chunk.content.to_ascii_lowercase();
let path = result
.chunk
.file_path
.replace('\\', "/")
.to_ascii_lowercase();
let matched = terms
.iter()
.filter(|term| content.contains(term.as_str()) || path.contains(term.as_str()))
.count();
let coverage = matched as f64 / terms.len() as f64;
0.35 + (0.65 * coverage)
}
fn meaningful_query_terms(query: &str) -> BTreeSet<String> {
tokenize(query)
.into_iter()
.map(|term| term.to_ascii_lowercase())
.filter(|term| term.len() >= 3)
.filter(|term| !QUERY_STOPWORDS.contains(&term.as_str()))
.collect()
}
const QUERY_STOPWORDS: &[&str] = &[
"and", "are", "but", "can", "for", "from", "has", "have", "how", "into", "not", "the", "then",
"this", "that", "what", "when", "where", "which", "with", "without",
];
#[cfg(test)]
mod tests {
use super::*;
use crate::Chunk;
fn result(path: &str, score: f64) -> SearchResult {
result_with_content(path, score, "pub mod openai;\n", 1, 1)
}
fn result_with_content(
path: &str,
score: f64,
content: &str,
start_line: usize,
end_line: usize,
) -> SearchResult {
SearchResult {
chunk: Chunk::new(
content.to_string(),
path.to_string(),
start_line,
end_line,
Some("rust".to_string()),
),
score,
match_lines: Vec::new(),
}
}
#[test]
fn merge_search_results_prefers_filename_match_over_references() {
let merged = merge_search_results(
"openai",
vec![
result("src/api/mod.rs", 20_001.0),
result("src/main.rs", 20_001.0),
result("tests/api_surface.rs", 20_001.0),
],
vec![result("src/api/openai.rs", 8.0)],
3,
);
assert_eq!(merged[0].chunk.file_path, "src/api/openai.rs");
}
#[test]
fn merge_search_results_prefers_broader_query_coverage() {
let merged = merge_search_results(
"routine completion reward pixel log",
vec![],
vec![
result_with_content(
"src/pixel_links.rs",
100.0,
"completion log completion log completion log",
1,
1,
),
result_with_content(
"src/routine_completion.rs",
95.0,
"routine completion reward pixel log",
1,
1,
),
],
2,
);
assert_eq!(merged[0].chunk.file_path, "src/routine_completion.rs");
}
#[test]
fn merge_search_results_prefers_symbol_definition_over_references() {
let merged = merge_search_results(
"AppConfig",
vec![
result_with_content(
"src/orchestrator.rs",
20_004.0,
"fn build(config: AppConfig) -> AppConfig { config }\n",
1,
1,
),
result_with_content(
"src/config.rs",
20_002.0,
"pub struct AppConfig {\n pub name: String,\n}\n",
1,
3,
),
],
vec![],
2,
);
assert_eq!(merged[0].chunk.file_path, "src/config.rs");
}
#[test]
fn merge_search_results_penalizes_test_paths_after_exact_merge() {
let merged = merge_search_results(
"planArtifactPanelReset",
vec![
result_with_content(
"src/artifact_panel_state.test.js",
20_002.0,
"planArtifactPanelReset();\n",
1,
1,
),
result_with_content(
"src/artifact_panel_state.js",
20_001.0,
"export function planArtifactPanelReset(state) { return state; }\n",
1,
1,
),
],
vec![],
2,
);
assert_eq!(merged[0].chunk.file_path, "src/artifact_panel_state.js");
}
#[test]
fn intent_path_multiplier_penalizes_ui_files_only_for_non_ui_queries() {
assert!(
intent_path_multiplier(
"payment retry backoff",
"src/components/RetryButton.tsx",
"RetryButton.tsx"
) < 1.0
);
assert_eq!(
intent_path_multiplier(
"payment retry button ui",
"src/components/RetryButton.tsx",
"RetryButton.tsx"
),
1.0
);
}
}