use std::borrow::Cow;
use std::collections::{HashMap, HashSet};
use std::fs;
use std::path::{Path, PathBuf};
use rayon::prelude::*;
use serde::Deserialize;
use crate::commands::callgraph_store_adapter::callers_result;
use crate::commands::symbol_render::{
build_container_outline, might_have_container_members, render_symbol_within_budget,
BudgetedSymbolRenderStatus,
};
use crate::context::{AppContext, SemanticIndexStatus};
use crate::grep_executor::{self, GrepParams};
use crate::inspect::job::{is_test_file, is_test_support_file};
use crate::pattern_compile::{self, CompileOpts, CompileResult};
use crate::protocol::{RawRequest, Response};
use crate::query_shape::{self, QueryKind, QueryShape};
use crate::search_index::{
sort_grep_matches_by_mtime_desc, GrepMatch, GrepResult, IndexStatus, SearchIndex,
};
use crate::semantic_index::{is_onnx_runtime_unavailable, EmbeddingModel, SemanticResult};
use crate::symbols::{Range, Symbol, SymbolKind};
const DEFAULT_TOP_K: usize = 10;
const MAX_TOP_K: usize = 100;
const HYBRID_LEXICAL_BOOST: f32 = 1.1;
const LEXICAL_ONLY_SCORE_CEILING: f32 = 0.25;
const LEXICAL_ENUMERATION_LIMIT: usize = 50;
const GENERATED_DIRECTORY_DENSITY_NUMERATOR: usize = 3;
const GENERATED_DIRECTORY_DENSITY_DENOMINATOR: usize = 5;
const SEMANTIC_OVERFETCH_MULTIPLIER: usize = 3;
const SEMANTIC_OVERFETCH_FLOOR: usize = 10;
const DEGRADED_GREP_FILE_LIMIT: usize = 1_000;
const DEGRADED_GREP_RESULT_LIMIT: usize = 100;
const RANK0_FULL_SNIPPET_MAX_LINES: usize = 250;
const RANK0_FULL_SYMBOL_NOTICE: &str =
"full symbol shown as-is on disk; no re-read or zoom needed before editing";
#[derive(Debug, Clone)]
pub struct HybridResult {
pub file: PathBuf,
pub name: String,
pub kind: SymbolKind,
pub start_line: u32,
pub end_line: u32,
pub exported: bool,
pub score: f32,
pub source: &'static str,
pub semantic_score: Option<f32>,
pub lexical_score: Option<f32>,
pub hybrid_boosted: bool,
pub(crate) cap_protected: bool,
pub(crate) lexical_generated_artifact: bool,
pub snippet: String,
}
#[derive(Debug, Clone, Copy, Default, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
enum SearchHint {
Regex,
Literal,
Semantic,
#[default]
Auto,
}
#[derive(Debug, Deserialize)]
struct SemanticSearchParams {
query: String,
#[serde(default = "default_top_k", alias = "topK")]
top_k: usize,
#[serde(default)]
hint: SearchHint,
#[serde(default, alias = "includeTests")]
include_tests: bool,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum SearchMode {
Regex,
Literal,
Semantic,
Hybrid,
}
#[derive(Debug, Clone)]
struct LexicalCollection {
files: Vec<(PathBuf, f32)>,
ready: bool,
engine_capped: bool,
}
#[derive(Debug, Clone)]
struct LexicalCandidate {
file: PathBuf,
score: f32,
generated_artifact: bool,
ordinal: usize,
}
struct GeneratedArtifactCache<'a> {
project_root: &'a Path,
directory_cache: HashMap<PathBuf, bool>,
}
#[derive(Debug, Clone)]
struct DegradedGrepFallbackResult {
grep: GrepResult,
file_cap_reached: bool,
file_limit: usize,
candidate_files: usize,
}
pub fn handle_semantic_search(req: &RawRequest, ctx: &AppContext) -> Response {
let mut params = match serde_json::from_value::<SemanticSearchParams>(req.params.clone()) {
Ok(params) => params,
Err(error) => {
return Response::error(
&req.id,
"invalid_request",
format!("semantic_search: invalid params: {error}"),
);
}
};
if params.query.trim().is_empty() {
return Response::error(&req.id, "invalid_request", "query must be non-empty");
}
params.query = strip_surrounding_quotes(params.query);
if params.query.trim().is_empty() {
return Response::error(&req.id, "invalid_request", "query must be non-empty");
}
let top_k = params.top_k.clamp(1, MAX_TOP_K);
let project_root = grep_executor::project_root(ctx);
let shape = query_shape::classify(¶ms.query);
let semantic_status_snapshot = ctx
.semantic_index_status()
.read()
.unwrap_or_else(std::sync::PoisonError::into_inner)
.clone();
let semantic_status = semantic_status_label(&semantic_status_snapshot);
let mut warnings = Vec::new();
let lexical_ready = search_index_ready(ctx);
let regex_explicit = params.hint == SearchHint::Regex;
let mode = choose_mode(
params.hint,
¶ms.query,
&shape,
lexical_ready,
&mut warnings,
);
match mode {
SearchMode::Regex | SearchMode::Literal => handle_grep_search(
req,
ctx,
¶ms.query,
top_k,
&shape,
mode,
regex_explicit,
semantic_status,
warnings,
&project_root,
params.include_tests,
),
SearchMode::Semantic | SearchMode::Hybrid => handle_semantic_or_hybrid_search(
req,
ctx,
params,
top_k,
shape,
mode,
semantic_status_snapshot,
semantic_status,
warnings,
&project_root,
),
}
}
fn default_top_k() -> usize {
DEFAULT_TOP_K
}
fn semantic_candidate_limit(top_k: usize) -> usize {
top_k
.saturating_mul(SEMANTIC_OVERFETCH_MULTIPLIER)
.clamp(SEMANTIC_OVERFETCH_FLOOR, MAX_TOP_K)
}
fn grep_candidate_limit(top_k: usize, include_tests: bool) -> usize {
if include_tests {
return top_k;
}
DEGRADED_GREP_RESULT_LIMIT.max(top_k)
}
fn project_relative_path<'a>(path: &'a Path, project_root: &'a Path) -> &'a Path {
path.strip_prefix(project_root).unwrap_or(path)
}
fn path_is_test_support_file(path: &Path, project_root: &Path) -> bool {
let relative = project_relative_path(path, project_root);
is_test_support_file(relative.to_string_lossy().as_ref())
}
fn path_is_hidden_test_file(path: &Path, project_root: &Path) -> bool {
let relative = project_relative_path(path, project_root);
let rel = relative.to_string_lossy();
is_test_support_file(rel.as_ref()) || is_test_file(rel.as_ref())
}
fn path_allowed_by_include_tests(path: &Path, project_root: &Path, include_tests: bool) -> bool {
include_tests || !path_is_hidden_test_file(path, project_root)
}
fn path_has_unambiguous_generated_artifact_type(path: &Path) -> bool {
let Some(file_name) = path.file_name().and_then(|name| name.to_str()) else {
return false;
};
let file_name = file_name.to_ascii_lowercase();
if file_name.ends_with(".min.js") || file_name.ends_with(".min.css") {
return true;
}
matches!(
path.extension()
.and_then(|extension| extension.to_str())
.map(str::to_ascii_lowercase)
.as_deref(),
Some("html" | "htm" | "css" | "map")
)
}
fn path_has_ambiguous_generated_artifact_type(path: &Path) -> bool {
matches!(
path.extension()
.and_then(|extension| extension.to_str())
.map(str::to_ascii_lowercase)
.as_deref(),
Some("json" | "svg" | "docset")
)
}
fn directory_is_generated_artifact_dominated(dir: &Path) -> bool {
let Ok(entries) = fs::read_dir(dir) else {
return false;
};
let mut file_count = 0usize;
let mut generated_count = 0usize;
for entry in entries.flatten() {
let Ok(file_type) = entry.file_type() else {
continue;
};
if !file_type.is_file() {
continue;
}
file_count += 1;
if path_has_unambiguous_generated_artifact_type(&entry.path()) {
generated_count += 1;
}
}
file_count > 0
&& generated_count * GENERATED_DIRECTORY_DENSITY_DENOMINATOR
>= file_count * GENERATED_DIRECTORY_DENSITY_NUMERATOR
}
impl<'a> GeneratedArtifactCache<'a> {
fn new(project_root: &'a Path) -> Self {
Self {
project_root,
directory_cache: HashMap::new(),
}
}
fn is_generated_artifact(&mut self, path: &Path) -> bool {
if path_has_unambiguous_generated_artifact_type(path) {
return true;
}
if !path_has_ambiguous_generated_artifact_type(path) {
return false;
}
let path_is_inside_project = path.starts_with(self.project_root);
let mut current = path.parent();
while let Some(dir) = current {
if path_is_inside_project && !dir.starts_with(self.project_root) {
break;
}
if self.directory_is_generated_artifact_dominated(dir) {
return true;
}
if path_is_inside_project && dir == self.project_root {
break;
}
current = dir.parent();
}
false
}
fn directory_is_generated_artifact_dominated(&mut self, dir: &Path) -> bool {
if let Some(cached) = self.directory_cache.get(dir) {
return *cached;
}
let dominated = directory_is_generated_artifact_dominated(dir);
self.directory_cache.insert(dir.to_path_buf(), dominated);
dominated
}
}
fn lexical_candidates_with_generated_artifact_rank(
lexical_files: Vec<(PathBuf, f32)>,
project_root: &Path,
) -> Vec<LexicalCandidate> {
let mut generated_cache = GeneratedArtifactCache::new(project_root);
let mut candidates = lexical_files
.into_iter()
.enumerate()
.map(|(ordinal, (file, score))| {
let generated_artifact = generated_cache.is_generated_artifact(&file);
LexicalCandidate {
file,
score,
generated_artifact,
ordinal,
}
})
.collect::<Vec<_>>();
candidates.sort_by(|a, b| {
a.generated_artifact
.cmp(&b.generated_artifact)
.then_with(|| {
b.score
.partial_cmp(&a.score)
.unwrap_or(std::cmp::Ordering::Equal)
})
.then_with(|| a.ordinal.cmp(&b.ordinal))
});
candidates
}
fn grep_visible_file_count(matches: &[GrepMatch]) -> usize {
matches
.iter()
.map(|grep_match| grep_match.file.clone())
.collect::<HashSet<_>>()
.len()
}
fn filter_grep_result_for_tests(
mut result: GrepResult,
include_tests: bool,
project_root: &Path,
top_k: usize,
) -> GrepResult {
if include_tests {
return result;
}
let mut visible = result
.matches
.into_iter()
.filter(|grep_match| !path_is_hidden_test_file(&grep_match.file, project_root))
.collect::<Vec<_>>();
let visible_total = visible.len();
let truncated_by_filter = visible.len() > top_k;
if truncated_by_filter {
visible.truncate(top_k);
}
result.matches = visible;
result.total_matches = visible_total;
result.files_with_matches = grep_visible_file_count(&result.matches);
result.truncated |= truncated_by_filter;
result
}
fn choose_mode(
hint: SearchHint,
query: &str,
shape: &QueryShape,
lexical_ready: bool,
warnings: &mut Vec<String>,
) -> SearchMode {
match hint {
SearchHint::Regex => {
if shape.kind == QueryKind::NaturalLanguage {
warnings.push(
"hint:'regex' was provided for a natural-language-looking query; interpreting it as regex.".to_string(),
);
}
SearchMode::Regex
}
SearchHint::Literal => {
if literal_tokens_all_short(query) {
warnings.push(
"Literal query with tokens shorter than 3 chars requires per-file scan; latency may be slow on large repos.".to_string(),
);
}
SearchMode::Literal
}
SearchHint::Semantic => {
if shape.kind == QueryKind::Regex {
warnings.push(
"hint:'semantic' was provided for a regex-looking query; skipping lexical/regex matching.".to_string(),
);
}
SearchMode::Semantic
}
SearchHint::Auto => {
if shape.kind == QueryKind::Regex {
return SearchMode::Regex;
}
if shape.kind != QueryKind::NaturalLanguage && extracted_tokens_all_short(query, shape)
{
warnings.push(
"Auto mode is using literal full-file scan for all-short exact tokens because the trigram index cannot rank tokens shorter than 3 chars.".to_string(),
);
return SearchMode::Literal;
}
if shape.kind == QueryKind::NaturalLanguage {
let word_count = query.split_whitespace().count();
if lexical_ready && word_count <= 2 {
return SearchMode::Hybrid;
}
return SearchMode::Semantic;
}
if lexical_ready {
SearchMode::Hybrid
} else {
warnings.push(
"Lexical trigram index is unavailable; using semantic search only.".to_string(),
);
SearchMode::Semantic
}
}
}
}
fn handle_grep_search(
req: &RawRequest,
ctx: &AppContext,
query: &str,
top_k: usize,
shape: &QueryShape,
mode: SearchMode,
regex_explicit: bool,
semantic_status: &'static str,
mut warnings: Vec<String>,
project_root: &Path,
include_tests: bool,
) -> Response {
let auto_regex = mode == SearchMode::Regex && !regex_explicit;
let mut effective_mode = mode;
let compile_literal_fallback = || -> Result<_, Response> {
match pattern_compile::compile(
query,
CompileOpts {
literal: true,
..CompileOpts::default()
},
) {
CompileResult::Ok(compiled) => Ok(compiled),
CompileResult::InvalidPattern { message, .. } => Err(Response::error_with_data(
&req.id,
"invalid_pattern",
message,
serde_json::json!({"pattern": query}),
)),
CompileResult::UnsupportedSyntax { feature, .. } => Err(Response::error_with_data(
&req.id,
"unsupported_pattern",
format!(
"Pattern uses regex syntax not supported by AFT's engine: {feature}. Use hint:'literal' or rewrite without {feature}."
),
serde_json::json!({"pattern": query, "feature": feature}),
)),
}
};
let compiled = match pattern_compile::compile(
query,
CompileOpts {
literal: mode == SearchMode::Literal,
..CompileOpts::default()
},
) {
CompileResult::Ok(compiled) => compiled,
CompileResult::InvalidPattern { message, .. } => {
if auto_regex {
warnings.push(auto_regex_literal_fallback_warning(
short_regex_compile_reason(&message),
));
effective_mode = SearchMode::Literal;
match compile_literal_fallback() {
Ok(compiled) => compiled,
Err(response) => return response,
}
} else {
return Response::error_with_data(
&req.id,
"invalid_pattern",
message,
serde_json::json!({"pattern": query}),
);
}
}
CompileResult::UnsupportedSyntax { feature, .. } => {
if auto_regex {
warnings.push(auto_regex_literal_fallback_warning(format!(
"{feature} is not supported"
)));
effective_mode = SearchMode::Literal;
match compile_literal_fallback() {
Ok(compiled) => compiled,
Err(response) => return response,
}
} else {
return Response::error_with_data(
&req.id,
"unsupported_pattern",
format!(
"Pattern uses regex syntax not supported by AFT's engine: {feature}. Use hint:'literal' or rewrite without {feature}."
),
serde_json::json!({"pattern": query, "feature": feature}),
);
}
}
};
let literal = effective_mode == SearchMode::Literal;
let grep_limit = grep_candidate_limit(top_k, include_tests);
let scope = match grep_executor::resolve_grep_scope(ctx, None, grep_limit, &req.id) {
Ok(scope) => scope,
Err(response) => return response,
};
let params = GrepParams {
include: Vec::new(),
exclude: Vec::new(),
max_results: grep_limit,
};
let mut result = grep_executor::execute(ctx, &compiled, &scope, ¶ms);
if result.fully_degraded {
warnings.push(degraded_warning(ctx));
}
result = filter_grep_result_for_tests(result, include_tests, project_root, top_k);
let result_source = if literal { "literal" } else { "regex" };
let result_values = result
.matches
.iter()
.map(|grep_match| grep_match_to_json(grep_match, result_source))
.collect::<Vec<_>>();
let interpreted_as = interpreted_as_label(effective_mode);
let text = format_grep_search_text(&result, project_root, interpreted_as);
search_response(
req,
SearchResponseParts {
query,
interpreted_as,
query_kind: query_kind_label(shape.kind),
semantic_status,
status: "ready",
complete: true,
text,
results: result_values,
more_available: result.truncated || result.total_matches > result.matches.len(),
engine_capped: result.engine_capped,
fully_degraded: result.fully_degraded,
warnings,
extras: serde_json::Map::new(),
},
)
}
fn short_regex_compile_reason(message: &str) -> Cow<'_, str> {
let trimmed = message.trim();
let reason = trimmed
.lines()
.rev()
.map(str::trim)
.find(|line| !line.is_empty() && !line.chars().all(|ch| ch == '^'))
.unwrap_or(trimmed);
Cow::Borrowed(
reason
.strip_prefix("error: ")
.or_else(|| reason.strip_prefix("invalid regex: "))
.unwrap_or(reason),
)
}
fn auto_regex_literal_fallback_warning(reason: impl AsRef<str>) -> String {
format!(
"Query looked like a regex but failed to compile ({}); searched literally instead. Pass hint:\"regex\" to force regex.",
reason.as_ref()
)
}
fn handle_semantic_or_hybrid_search(
req: &RawRequest,
ctx: &AppContext,
params: SemanticSearchParams,
top_k: usize,
shape: QueryShape,
mode: SearchMode,
status: SemanticIndexStatus,
semantic_status: &'static str,
mut warnings: Vec<String>,
project_root: &Path,
) -> Response {
let lexical = if mode == SearchMode::Hybrid {
collect_lexical_files(
ctx,
¶ms.query,
&shape,
params.include_tests,
project_root,
)
} else {
LexicalCollection {
files: Vec::new(),
ready: search_index_ready(ctx),
engine_capped: false,
}
};
match status {
SemanticIndexStatus::Disabled => {
return semantic_unavailable_or_fallback_response(
req,
ctx,
¶ms,
mode,
&shape,
"disabled",
"disabled",
"Semantic search is not enabled.".to_string(),
lexical,
warnings,
project_root,
top_k,
);
}
SemanticIndexStatus::Failed(error) => {
return semantic_unavailable_or_fallback_response(
req,
ctx,
¶ms,
mode,
&shape,
"unavailable",
"unavailable",
format!("Semantic search unavailable: {error}"),
lexical,
warnings,
project_root,
top_k,
);
}
SemanticIndexStatus::Building {
stage,
files,
entries_done,
entries_total,
} => {
let mut detail = format!("Semantic index is still building (stage: {}).", stage);
if let Some(files) = files {
detail.push_str(&format!(" files: {}", files));
}
if let Some(entries_done) = entries_done {
detail.push_str(&format!(" entries done: {}", entries_done));
}
if let Some(entries_total) = entries_total {
detail.push_str(&format!(" / {}", entries_total));
}
if natural_language_degraded_fallback_available(params.hint, mode, &shape) {
return semantic_unavailable_grep_fallback_response(
req,
ctx,
¶ms,
&shape,
"building",
detail,
warnings,
project_root,
top_k,
);
}
let lexical_count = lexical.files.len();
let lexical_engine_capped = lexical.engine_capped;
let results = fuse_hybrid_results(
Vec::new(),
lexical.files,
&shape,
top_k,
params.include_tests,
project_root,
);
let result_values = results.iter().map(result_to_json).collect::<Vec<_>>();
let note = building_lexical_note(lexical.ready);
let mut extras = serde_json::Map::new();
extras.insert("stage".to_string(), serde_json::json!(stage));
extras.insert("files".to_string(), serde_json::json!(files));
extras.insert("entries_done".to_string(), serde_json::json!(entries_done));
extras.insert(
"entries_total".to_string(),
serde_json::json!(entries_total),
);
extras.insert("note".to_string(), serde_json::json!(note));
extras.insert("semantic_rebuilding".to_string(), serde_json::json!(true));
extras.insert(
"lexical_only_fallback".to_string(),
serde_json::json!(lexical.ready),
);
return search_response(
req,
SearchResponseParts {
query: ¶ms.query,
interpreted_as: fallback_executed_label(mode, lexical.ready),
query_kind: query_kind_label(shape.kind),
semantic_status: "building",
status: "building",
complete: false,
text: format_building_lexical_text(
&detail,
&results,
project_root,
lexical.ready,
),
results: result_values,
more_available: lexical_count > top_k || lexical_engine_capped,
engine_capped: lexical_engine_capped,
fully_degraded: false,
warnings,
extras,
},
);
}
SemanticIndexStatus::Ready { refreshing, .. } => {
if !refreshing.is_empty() {
warnings.push(format!(
"{} file(s) refreshing; results for those files may be temporarily missing",
refreshing.len()
));
}
}
}
if !semantic_index_loaded(ctx) {
return semantic_unavailable_or_fallback_response(
req,
ctx,
¶ms,
mode,
&shape,
"unavailable",
"not_ready",
"Semantic index is not ready yet.".to_string(),
lexical,
warnings,
project_root,
top_k,
);
}
let query_vector = match embed_query(¶ms.query, ctx) {
Ok(query_vector) => query_vector,
Err(error) => {
if params.hint == SearchHint::Semantic
|| !semantic_degraded_fallback_available(¶ms, mode, &shape, &lexical)
{
return semantic_error_response(&req.id, &error);
}
return semantic_unavailable_or_fallback_response(
req,
ctx,
¶ms,
mode,
&shape,
"unavailable",
"unavailable",
format!("Semantic search unavailable: {error}"),
lexical,
warnings,
project_root,
top_k,
);
}
};
let semantic_limit = if params.include_tests {
semantic_candidate_limit(top_k)
} else {
MAX_TOP_K
};
let semantic_fetch_limit = semantic_limit.saturating_add(1);
let mut semantic_results = {
let semantic_index = ctx
.semantic_index()
.read()
.unwrap_or_else(std::sync::PoisonError::into_inner);
semantic_index
.as_ref()
.map(|index| index.search(&query_vector, semantic_fetch_limit))
.unwrap_or_default()
};
let semantic_more_available = semantic_results.len() > semantic_limit;
if semantic_more_available {
semantic_results.truncate(semantic_limit);
}
rerank_semantic_candidates(&mut semantic_results, &shape, ¶ms.query);
let mut results = fuse_hybrid_results(
semantic_results,
lexical.files,
&shape,
top_k.saturating_add(1),
params.include_tests,
project_root,
);
let fused_more_available = results.len() > top_k;
if fused_more_available {
results.truncate(top_k);
}
let more_available = fused_more_available || semantic_more_available || lexical.engine_capped;
let snippets_incomplete =
enrich_snippets_from_source_with_context(&mut results, project_root, Some(ctx));
search_response(
req,
SearchResponseParts {
query: ¶ms.query,
interpreted_as: interpreted_as_label(mode),
query_kind: query_kind_label(shape.kind),
semantic_status,
status: "ready",
complete: true,
text: format_semantic_text(
&results,
project_root,
more_available,
snippets_incomplete,
Some(ctx),
),
results: results.iter().map(result_to_json).collect::<Vec<_>>(),
more_available,
engine_capped: lexical.engine_capped,
fully_degraded: false,
warnings,
extras: serde_json::Map::new(),
},
)
}
struct SearchResponseParts<'a> {
query: &'a str,
interpreted_as: &'static str,
query_kind: &'static str,
semantic_status: &'static str,
status: &'static str,
complete: bool,
text: String,
results: Vec<serde_json::Value>,
more_available: bool,
engine_capped: bool,
fully_degraded: bool,
warnings: Vec<String>,
extras: serde_json::Map<String, serde_json::Value>,
}
impl<'a> SearchResponseParts<'a> {
fn result_count(&self) -> usize {
self.results.len()
}
}
fn search_response(req: &RawRequest, parts: SearchResponseParts<'_>) -> Response {
let mut object = serde_json::Map::new();
object.insert("status".to_string(), serde_json::json!(parts.status));
object.insert("complete".to_string(), serde_json::json!(parts.complete));
object.insert("text".to_string(), serde_json::json!(parts.text));
object.insert("query".to_string(), serde_json::json!(parts.query));
object.insert(
"interpreted_as".to_string(),
serde_json::json!(parts.interpreted_as),
);
object.insert(
"query_kind".to_string(),
serde_json::json!(parts.query_kind),
);
object.insert(
"result_count".to_string(),
serde_json::json!(parts.result_count()),
);
object.insert(
"results".to_string(),
serde_json::Value::Array(parts.results),
);
object.insert(
"more_available".to_string(),
serde_json::json!(parts.more_available),
);
object.insert(
"engine_capped".to_string(),
serde_json::json!(parts.engine_capped),
);
object.insert(
"fully_degraded".to_string(),
serde_json::json!(parts.fully_degraded),
);
object.insert(
"semantic_status".to_string(),
serde_json::json!(parts.semantic_status),
);
if !parts.warnings.is_empty() {
object.insert("warnings".to_string(), serde_json::json!(parts.warnings));
}
for (key, value) in parts.extras {
object.insert(key, value);
}
Response::success(&req.id, serde_json::Value::Object(object))
}
fn semantic_unavailable_or_fallback_response(
req: &RawRequest,
ctx: &AppContext,
params: &SemanticSearchParams,
mode: SearchMode,
shape: &QueryShape,
semantic_status: &'static str,
unavailable_status: &'static str,
detail: String,
lexical: LexicalCollection,
mut warnings: Vec<String>,
project_root: &Path,
top_k: usize,
) -> Response {
if params.hint == SearchHint::Semantic {
return semantic_unavailable_response(&req.id, detail);
}
let lexical_ready = mode == SearchMode::Hybrid && lexical.ready;
if lexical_ready {
let lexical_count = lexical.files.len();
let lexical_engine_capped = lexical.engine_capped;
let results = fuse_hybrid_results(
Vec::new(),
lexical.files,
shape,
top_k,
params.include_tests,
project_root,
);
let result_values = results.iter().map(result_to_json).collect::<Vec<_>>();
warnings.push(
"Semantic search unavailable; returning lexical-only fallback results.".to_string(),
);
return search_response(
req,
SearchResponseParts {
query: ¶ms.query,
interpreted_as: fallback_executed_label(mode, true),
query_kind: query_kind_label(shape.kind),
semantic_status,
status: "ready",
complete: false,
text: format_lexical_unavailable_text(&detail, &results, project_root),
results: result_values,
more_available: lexical_count > top_k || lexical_engine_capped,
engine_capped: lexical_engine_capped,
fully_degraded: false,
warnings,
extras: semantic_unavailable_extras(true),
},
);
}
if semantic_degraded_fallback_available(params, mode, shape, &lexical) {
return semantic_unavailable_grep_fallback_response(
req,
ctx,
params,
shape,
semantic_status,
detail,
warnings,
project_root,
top_k,
);
}
let mut extras = semantic_unavailable_extras(false);
if mode == SearchMode::Hybrid {
extras.insert("lexical_unavailable".to_string(), serde_json::json!(true));
}
search_response(
req,
SearchResponseParts {
query: ¶ms.query,
interpreted_as: interpreted_as_label(mode),
query_kind: query_kind_label(shape.kind),
semantic_status,
status: unavailable_status,
complete: false,
text: detail,
results: Vec::new(),
more_available: false,
engine_capped: lexical.engine_capped,
fully_degraded: false,
warnings,
extras,
},
)
}
fn semantic_unavailable_response(request_id: &str, detail: String) -> Response {
Response::error(request_id, "semantic_unavailable", detail)
}
fn semantic_unavailable_extras(
lexical_only_fallback: bool,
) -> serde_json::Map<String, serde_json::Value> {
let mut extras = serde_json::Map::new();
extras.insert("semantic_unavailable".to_string(), serde_json::json!(true));
extras.insert(
"lexical_only_fallback".to_string(),
serde_json::json!(lexical_only_fallback),
);
extras
}
fn semantic_degraded_fallback_available(
params: &SemanticSearchParams,
mode: SearchMode,
shape: &QueryShape,
lexical: &LexicalCollection,
) -> bool {
if natural_language_degraded_fallback_available(params.hint, mode, shape) {
return true;
}
params.hint != SearchHint::Semantic
&& mode == SearchMode::Semantic
&& !lexical.ready
&& shape.weights.should_use_lexical
}
fn natural_language_degraded_fallback_available(
hint: SearchHint,
mode: SearchMode,
shape: &QueryShape,
) -> bool {
hint != SearchHint::Semantic
&& mode == SearchMode::Semantic
&& shape.kind == QueryKind::NaturalLanguage
}
fn semantic_unavailable_grep_fallback_response(
req: &RawRequest,
ctx: &AppContext,
params: &SemanticSearchParams,
shape: &QueryShape,
semantic_status: &'static str,
detail: String,
mut warnings: Vec<String>,
project_root: &Path,
top_k: usize,
) -> Response {
let fallback = match execute_degraded_grep_fallback(
¶ms.query,
project_root,
top_k,
params.include_tests,
&req.id,
) {
Ok(result) => result,
Err(response) => return response,
};
let result = &fallback.grep;
if result.fully_degraded {
warnings.push(degraded_warning(ctx));
}
if fallback.file_cap_reached {
warnings.push(format!(
"Degraded grep reached its {}-file scan cap; additional files were not scanned.",
fallback.file_limit
));
}
warnings
.push("Semantic search unavailable; returning lexical-only fallback results.".to_string());
let result_values = result
.matches
.iter()
.map(|grep_match| grep_match_to_json(grep_match, "literal"))
.collect::<Vec<_>>();
let more_available = result.truncated
|| result.total_matches > result.matches.len()
|| fallback.file_cap_reached;
let mut extras = semantic_unavailable_extras(true);
if fallback.file_cap_reached {
extras.insert(
"degraded_grep_walk_truncated".to_string(),
serde_json::json!(true),
);
extras.insert(
"degraded_grep_file_limit".to_string(),
serde_json::json!(fallback.file_limit),
);
extras.insert(
"degraded_grep_candidate_files".to_string(),
serde_json::json!(fallback.candidate_files),
);
}
search_response(
req,
SearchResponseParts {
query: ¶ms.query,
interpreted_as: "literal",
query_kind: query_kind_label(shape.kind),
semantic_status,
status: "ready",
complete: false,
text: format_grep_lexical_unavailable_text(&detail, result, project_root),
results: result_values,
more_available,
engine_capped: result.engine_capped,
fully_degraded: result.fully_degraded,
warnings,
extras,
},
)
}
fn execute_degraded_grep_fallback(
query: &str,
project_root: &Path,
top_k: usize,
include_tests: bool,
request_id: &str,
) -> Result<DegradedGrepFallbackResult, Response> {
let compiled = match pattern_compile::compile(
query,
CompileOpts {
literal: true,
..CompileOpts::default()
},
) {
CompileResult::Ok(compiled) => compiled,
CompileResult::InvalidPattern { message, .. } => {
return Err(Response::error_with_data(
request_id,
"invalid_pattern",
message,
serde_json::json!({"pattern": query}),
));
}
CompileResult::UnsupportedSyntax { feature, .. } => {
return Err(Response::error_with_data(
request_id,
"unsupported_pattern",
format!(
"Pattern uses regex syntax not supported by AFT's engine: {feature}. Use hint:'literal' or rewrite without {feature}."
),
serde_json::json!({"pattern": query, "feature": feature}),
));
}
};
let max_results = top_k.clamp(1, DEGRADED_GREP_RESULT_LIMIT);
let (files, file_cap_reached) = collect_degraded_grep_files(project_root, include_tests);
let candidate_files = files.len();
let mut matches = Vec::new();
let mut total_matches = 0usize;
let mut files_searched = 0usize;
let mut files_with_matches = 0usize;
let mut truncated = false;
let mut engine_capped = file_cap_reached;
let mut readable_files = files
.par_iter()
.enumerate()
.filter_map(|(index, file)| {
crate::search_index::read_searchable_text(file)
.map(|content| (index, file.clone(), content))
})
.collect::<Vec<_>>();
readable_files.sort_by_key(|(index, _, _)| *index);
for (_, file, content) in readable_files {
if truncated {
engine_capped = true;
break;
}
files_searched += 1;
if search_degraded_grep_file(
&file,
&content,
&compiled,
max_results,
&mut total_matches,
&mut truncated,
&mut matches,
) {
files_with_matches += 1;
}
}
if truncated {
engine_capped = true;
}
sort_grep_matches_by_mtime_desc(&mut matches, project_root);
Ok(DegradedGrepFallbackResult {
grep: GrepResult {
matches,
total_matches,
files_searched,
files_with_matches,
index_status: IndexStatus::Fallback,
truncated,
fully_degraded: true,
engine_capped,
walk_truncated: false,
},
file_cap_reached,
file_limit: DEGRADED_GREP_FILE_LIMIT,
candidate_files,
})
}
fn collect_degraded_grep_files(project_root: &Path, include_tests: bool) -> (Vec<PathBuf>, bool) {
let walker = ignore::WalkBuilder::new(project_root)
.hidden(false)
.git_ignore(true)
.git_global(true)
.git_exclude(true)
.add_custom_ignore_filename(".aftignore")
.filter_entry(|entry| {
let name = entry.file_name().to_string_lossy();
if entry
.file_type()
.map_or(false, |file_type| file_type.is_dir())
{
return !matches!(
name.as_ref(),
"node_modules"
| "target"
| "venv"
| ".venv"
| ".git"
| "__pycache__"
| ".tox"
| "dist"
| "build"
);
}
true
})
.build();
let mut files = Vec::new();
for entry in walker.filter_map(|entry| entry.ok()) {
if !entry
.file_type()
.map_or(false, |file_type| file_type.is_file())
{
continue;
}
let path = entry.into_path();
if !include_tests && path_is_hidden_test_file(&path, project_root) {
continue;
}
if files.len() >= DEGRADED_GREP_FILE_LIMIT {
return (files, true);
}
files.push(path);
}
(files, false)
}
fn search_degraded_grep_file(
file: &Path,
content: &str,
compiled: &pattern_compile::CompiledPattern,
max_results: usize,
total_matches: &mut usize,
truncated: &mut bool,
matches: &mut Vec<GrepMatch>,
) -> bool {
let line_starts = grep_executor::line_starts(content);
let mut seen_lines = HashSet::new();
let mut matched_this_file = false;
match compiled {
pattern_compile::CompiledPattern::Literal(literal) => {
let Some(needle) = std::str::from_utf8(&literal.needle).ok() else {
return false;
};
let haystack = if literal.case_insensitive_ascii {
Cow::Owned(content.to_ascii_lowercase())
} else {
Cow::Borrowed(content)
};
for (offset, matched) in haystack.match_indices(needle) {
let match_text = content[offset..offset + matched.len()].to_string();
let (counted, should_continue) = record_degraded_grep_match(
file,
content,
&line_starts,
&mut seen_lines,
offset,
match_text,
max_results,
total_matches,
truncated,
matches,
);
matched_this_file |= counted;
if !should_continue {
break;
}
}
}
pattern_compile::CompiledPattern::Regex { compiled, .. } => {
for matched in compiled.find_iter(content.as_bytes()) {
let (counted, should_continue) = record_degraded_grep_match(
file,
content,
&line_starts,
&mut seen_lines,
matched.start(),
String::from_utf8_lossy(matched.as_bytes()).into_owned(),
max_results,
total_matches,
truncated,
matches,
);
matched_this_file |= counted;
if !should_continue {
break;
}
}
}
}
matched_this_file
}
fn record_degraded_grep_match(
file: &Path,
content: &str,
line_starts: &[usize],
seen_lines: &mut HashSet<u32>,
offset: usize,
match_text: String,
max_results: usize,
total_matches: &mut usize,
truncated: &mut bool,
matches: &mut Vec<GrepMatch>,
) -> (bool, bool) {
let (line, column, line_text) = grep_executor::line_details(content, line_starts, offset);
if !seen_lines.insert(line) {
return (false, true);
}
*total_matches += 1;
if matches.len() >= max_results {
*truncated = true;
return (true, false);
}
matches.push(GrepMatch {
file: file.to_path_buf(),
line,
column,
line_text,
match_text,
});
(true, true)
}
fn semantic_index_loaded(ctx: &AppContext) -> bool {
ctx.semantic_index()
.read()
.unwrap_or_else(std::sync::PoisonError::into_inner)
.is_some()
}
fn collect_lexical_files(
ctx: &AppContext,
query: &str,
shape: &QueryShape,
include_tests: bool,
project_root: &Path,
) -> LexicalCollection {
let tokens = if shape.kind == QueryKind::NaturalLanguage {
query_shape::extract_short_nl_lexical_tokens(query)
} else {
query_shape::extract_tokens(query, shape)
};
let token_refs = tokens.iter().map(String::as_str).collect::<Vec<_>>();
let query_trigrams = SearchIndex::query_trigrams_from_tokens(&token_refs);
let snapshot = {
let search_index = ctx
.search_index()
.read()
.unwrap_or_else(std::sync::PoisonError::into_inner);
let Some(index) = search_index.as_ref().filter(|index| index.ready) else {
return LexicalCollection {
files: Vec::new(),
ready: false,
engine_capped: false,
};
};
index.snapshot()
};
let production_file_filter =
|path: &Path| path_allowed_by_include_tests(path, project_root, include_tests);
let filter = (!include_tests).then_some(&production_file_filter as &dyn Fn(&Path) -> bool);
let ranked =
snapshot.lexical_rank_with_stats(&query_trigrams, filter, LEXICAL_ENUMERATION_LIMIT);
LexicalCollection {
files: ranked.files,
ready: true,
engine_capped: ranked.engine_capped,
}
}
fn search_index_ready(ctx: &AppContext) -> bool {
ctx.search_index()
.read()
.unwrap_or_else(std::sync::PoisonError::into_inner)
.as_ref()
.is_some_and(|index| index.ready)
}
fn embed_query(query: &str, ctx: &AppContext) -> Result<Vec<f32>, String> {
let mut model_ref = ctx.semantic_embedding_model().lock();
if model_ref.is_none() {
drop(model_ref);
let semantic_config = ctx.config().semantic.clone();
let constructed_model = EmbeddingModel::from_config_for_query(&semantic_config)?;
model_ref = ctx.semantic_embedding_model().lock();
if model_ref.is_none() {
*model_ref = Some(constructed_model);
} else {
drop(model_ref);
{
let _discarded_model = constructed_model;
}
model_ref = ctx.semantic_embedding_model().lock();
}
}
let model = model_ref
.as_mut()
.ok_or_else(|| "embedding model was not initialized".to_string())?;
let query_vector = model
.embed_query_cached(query)
.map_err(|error| format!("failed to embed query: {error}"))?;
drop(model_ref);
let index_dimension = {
let semantic_index = ctx
.semantic_index()
.read()
.unwrap_or_else(std::sync::PoisonError::into_inner);
semantic_index
.as_ref()
.filter(|index| index.len() > 0)
.map(|index| index.dimension())
};
if let Some(index_dimension) = index_dimension {
if index_dimension != query_vector.len() {
return Err(format!(
"semantic embedding dimension mismatch: query backend returned {}, index expects {}. Rebuild the semantic index for the active backend/model.",
query_vector.len(),
index_dimension
));
}
}
Ok(query_vector)
}
fn rerank_semantic_candidates(results: &mut Vec<SemanticResult>, shape: &QueryShape, query: &str) {
let (tokens, allow_case_fold) = semantic_rerank_tokens(query, shape);
let type_concept = query_shape::is_type_concept_identifier_query(query, shape);
let apply_definition_priors = shape.kind == QueryKind::NaturalLanguage || type_concept;
let kind_prior_strength = semantic_kind_prior_strength(shape, apply_definition_priors);
for result in results.iter_mut() {
result.rank_score = result.score;
result.cap_protected = false;
result.rank_score *= semantic_kind_multiplier(&result.kind, kind_prior_strength);
if !tokens.is_empty()
&& is_definition_kind(&result.kind)
&& tokens
.iter()
.any(|token| token_matches_candidate_name(token, result, allow_case_fold))
{
result.rank_score *= EXACT_NAME_DEFINITION_BOOST;
if result.score >= P2_CAP_PROTECTED_COSINE_FLOOR {
result.cap_protected = true;
}
}
}
if apply_definition_priors {
apply_natural_language_diversity_cap(results);
}
}
#[derive(Clone, Copy)]
enum SemanticKindPriorStrength {
NaturalLanguage,
Mixed,
Inert,
}
fn semantic_kind_prior_strength(
shape: &QueryShape,
apply_definition_priors: bool,
) -> SemanticKindPriorStrength {
if apply_definition_priors {
SemanticKindPriorStrength::NaturalLanguage
} else if shape.kind == QueryKind::Mixed {
SemanticKindPriorStrength::Mixed
} else {
SemanticKindPriorStrength::Inert
}
}
fn semantic_rerank_tokens(query: &str, shape: &QueryShape) -> (Vec<String>, bool) {
match shape.kind {
QueryKind::Identifier => (query_shape::extract_tokens(query, shape), false),
QueryKind::Mixed => (query_shape::extract_tokens(query, shape), true),
QueryKind::NaturalLanguage => (query_shape::extract_explicit_code_tokens(query), true),
QueryKind::Path | QueryKind::ErrorCode | QueryKind::Regex => (Vec::new(), false),
}
}
fn semantic_kind_multiplier(kind: &SymbolKind, strength: SemanticKindPriorStrength) -> f32 {
match strength {
SemanticKindPriorStrength::NaturalLanguage => match kind {
SymbolKind::Function
| SymbolKind::Class
| SymbolKind::Method
| SymbolKind::Struct
| SymbolKind::Interface
| SymbolKind::Enum
| SymbolKind::TypeAlias => 1.08,
SymbolKind::Variable => 0.92,
SymbolKind::FileSummary => 0.80,
SymbolKind::Heading => 1.0,
},
SemanticKindPriorStrength::Mixed => match kind {
SymbolKind::Function
| SymbolKind::Class
| SymbolKind::Method
| SymbolKind::Struct
| SymbolKind::Interface
| SymbolKind::Enum
| SymbolKind::TypeAlias => 1.03,
SymbolKind::FileSummary => 0.90,
SymbolKind::Variable | SymbolKind::Heading => 1.0,
},
SemanticKindPriorStrength::Inert => 1.0,
}
}
fn is_definition_kind(kind: &SymbolKind) -> bool {
matches!(
kind,
SymbolKind::Function
| SymbolKind::Class
| SymbolKind::Method
| SymbolKind::Struct
| SymbolKind::Interface
| SymbolKind::Enum
| SymbolKind::TypeAlias
)
}
fn token_matches_candidate_name(
token: &str,
result: &SemanticResult,
allow_case_fold: bool,
) -> bool {
names_equal(token, &result.name, allow_case_fold)
|| result
.qualified_name
.as_deref()
.is_some_and(|qualified_name| names_equal(token, qualified_name, allow_case_fold))
}
fn names_equal(token: &str, name: &str, allow_case_fold: bool) -> bool {
token == name || (allow_case_fold && token.eq_ignore_ascii_case(name))
}
fn apply_natural_language_diversity_cap(results: &mut Vec<SemanticResult>) {
let mut cluster_counts: HashMap<(String, SymbolKind), usize> = HashMap::new();
results.retain(|result| {
let key = (
result
.qualified_name
.as_deref()
.unwrap_or(&result.name)
.to_string(),
result.kind.clone(),
);
let count = cluster_counts.entry(key).or_insert(0);
if *count < NATURAL_LANGUAGE_CLUSTER_CAP {
*count += 1;
true
} else {
false
}
});
}
fn sort_hybrid_results(results: &mut [HybridResult]) {
results.sort_by(|a, b| {
a.lexical_generated_artifact
.cmp(&b.lexical_generated_artifact)
.then_with(|| {
b.score
.partial_cmp(&a.score)
.unwrap_or(std::cmp::Ordering::Equal)
})
.then_with(|| a.file.cmp(&b.file))
.then_with(|| a.name.cmp(&b.name))
});
}
pub fn fuse_hybrid_results(
semantic: Vec<SemanticResult>,
lexical_files: Vec<(PathBuf, f32)>,
shape: &QueryShape,
top_k: usize,
include_tests: bool,
project_root: &Path,
) -> Vec<HybridResult> {
if top_k == 0 {
return Vec::new();
}
let semantic = semantic
.into_iter()
.filter(|result| path_allowed_by_include_tests(&result.file, project_root, include_tests))
.collect::<Vec<_>>();
let lexical_files = lexical_files
.into_iter()
.filter(|(file, _)| path_allowed_by_include_tests(file, project_root, include_tests))
.collect::<Vec<_>>();
let lexical_candidates =
lexical_candidates_with_generated_artifact_rank(lexical_files, project_root);
if lexical_candidates.is_empty() {
let mut results = semantic
.into_iter()
.map(|result| hybrid_from_semantic(result, None))
.collect::<Vec<_>>();
sort_hybrid_results(&mut results);
results.truncate(top_k);
return results;
}
if semantic.is_empty() {
return lexical_candidates
.into_iter()
.take(top_k)
.map(|candidate| lexical_only_result(candidate, shape))
.collect();
}
let lexical_top_files: HashMap<PathBuf, LexicalCandidate> = lexical_candidates
.iter()
.map(|candidate| (candidate.file.clone(), candidate.clone()))
.collect();
let mut results: Vec<HybridResult> = semantic
.into_iter()
.map(|result| {
let lexical_candidate = lexical_top_files.get(&result.file);
hybrid_from_semantic(result, lexical_candidate)
})
.collect();
let semantic_files: HashSet<PathBuf> =
results.iter().map(|result| result.file.clone()).collect();
for candidate in lexical_candidates {
if !semantic_files.contains(&candidate.file) {
results.push(lexical_only_result(candidate, shape));
}
}
sort_hybrid_results(&mut results);
let mut results = cap_per_file(results, 2);
sort_hybrid_results(&mut results);
results.truncate(top_k);
results
}
fn hybrid_from_semantic(
result: SemanticResult,
lexical_candidate: Option<&LexicalCandidate>,
) -> HybridResult {
let semantic_score = result.score;
let ranking_score = result.rank_score;
let cap_protected = result.cap_protected;
let lexical_score = lexical_candidate.map(|candidate| candidate.score);
let hybrid_boosted = lexical_candidate.is_some_and(|candidate| !candidate.generated_artifact);
let score = if hybrid_boosted {
ranking_score * HYBRID_LEXICAL_BOOST
} else {
ranking_score
};
HybridResult {
file: result.file,
name: result.name,
kind: result.kind,
start_line: result.start_line,
end_line: result.end_line,
exported: result.exported,
snippet: result.snippet,
score,
source: "semantic",
semantic_score: Some(semantic_score),
lexical_score,
hybrid_boosted,
cap_protected,
lexical_generated_artifact: false,
}
}
fn lexical_only_result(candidate: LexicalCandidate, shape: &QueryShape) -> HybridResult {
let score = if candidate.generated_artifact {
0.0
} else {
(candidate.score * shape_dependent_lexical_only_weight(shape))
.min(LEXICAL_ONLY_SCORE_CEILING)
};
HybridResult {
file: candidate.file,
name: String::new(),
kind: SymbolKind::FileSummary,
start_line: 0,
end_line: 0,
exported: false,
score,
source: "lexical",
semantic_score: None,
lexical_score: Some(candidate.score),
hybrid_boosted: false,
cap_protected: false,
lexical_generated_artifact: candidate.generated_artifact,
snippet: "[lexical match — use aft_zoom or read for context]".to_string(),
}
}
fn shape_dependent_lexical_only_weight(shape: &QueryShape) -> f32 {
match shape.kind {
QueryKind::Identifier => 0.8,
QueryKind::Path | QueryKind::ErrorCode | QueryKind::Mixed => 0.5,
QueryKind::NaturalLanguage | QueryKind::Regex => 0.0,
}
}
fn cap_per_file(results: Vec<HybridResult>, cap: usize) -> Vec<HybridResult> {
let mut ordinary_counts: HashMap<PathBuf, usize> = HashMap::new();
let mut capped = Vec::new();
for result in results {
if result.cap_protected {
capped.push(result);
continue;
}
let count = ordinary_counts.entry(result.file.clone()).or_insert(0);
if *count < cap {
*count += 1;
capped.push(result);
}
}
capped
}
fn semantic_error_response(request_id: &str, error: &str) -> Response {
if is_onnx_runtime_unavailable(error) {
return Response::error(
request_id,
"semantic_search_unavailable",
format!("Semantic search unavailable: {error}"),
);
}
Response::error(
request_id,
"semantic_search_failed",
format!("semantic_search: {error}"),
)
}
fn format_lexical_unavailable_text(
detail: &str,
results: &[HybridResult],
project_root: &Path,
) -> String {
if results.is_empty() {
return format!(
"{detail}\nSemantic search unavailable; lexical-only fallback returned 0 result(s). [semantic: unavailable]"
);
}
format!(
"{detail}\nSemantic search unavailable; returning lexical-only fallback results.\n\n{}\n\nFound {} lexical fallback result(s). [semantic: unavailable]",
format_result_sections(results, project_root),
results.len()
)
}
fn format_grep_lexical_unavailable_text(
detail: &str,
result: &GrepResult,
project_root: &Path,
) -> String {
if result.matches.is_empty() {
return format!(
"{detail}\nSemantic search unavailable; lexical-only fallback returned 0 result(s). [semantic: unavailable]"
);
}
format!(
"{detail}\nSemantic search unavailable; returning lexical-only fallback results.\n\n{}\n\nFound {} lexical fallback result(s). [semantic: unavailable]",
crate::commands::grep::format_grep_text(result, project_root),
result.matches.len()
)
}
fn building_lexical_note(lexical_index_ready: bool) -> &'static str {
if lexical_index_ready {
"Semantic index is rebuilding; results are lexical-only fallback results from the trigram index."
} else {
"Semantic index is rebuilding; lexical fallback is unavailable because the trigram index is not ready."
}
}
fn format_building_lexical_text(
detail: &str,
results: &[HybridResult],
project_root: &Path,
lexical_index_ready: bool,
) -> String {
let note = building_lexical_note(lexical_index_ready);
if results.is_empty() {
return format!(
"{detail}\n{note}\nFound 0 lexical fallback result(s). [semantic: rebuilding]"
);
}
format!(
"{detail}\n{note}\n\n{}\n\nFound {} lexical fallback result(s). [semantic: rebuilding]",
format_result_sections(results, project_root),
results.len()
)
}
const WEAK_MATCH_COSINE_FLOOR: f32 = 0.35;
const P2_CAP_PROTECTED_COSINE_FLOOR: f32 = WEAK_MATCH_COSINE_FLOOR;
const EXACT_NAME_DEFINITION_BOOST: f32 = 1.20;
const NATURAL_LANGUAGE_CLUSTER_CAP: usize = 2;
const HIGH_CONFIDENCE_COSINE_FLOOR: f32 = 0.60;
fn results_are_low_confidence(results: &[HybridResult]) -> bool {
results
.first()
.and_then(|r| r.semantic_score)
.is_some_and(|cosine| cosine < WEAK_MATCH_COSINE_FLOOR)
}
fn format_semantic_text(
results: &[HybridResult],
project_root: &Path,
more_available: bool,
snippets_incomplete: bool,
ctx: Option<&AppContext>,
) -> String {
if results.is_empty() {
return "Found 0 results.".to_string();
}
let mut text = format_result_sections_with_context(results, project_root, ctx);
text.push_str(&format!("\n\nFound {} result(s).", results.len()));
if more_available {
text.push_str(" More results available; raise topK to see more.");
}
if results_are_low_confidence(results) {
text.push_str("\nTop match is weak — consider rephrasing or using grep for exact terms.");
}
if snippets_incomplete {
text.push_str("\nZoom any result for full source: aft_zoom <file> <symbol>.");
}
text
}
fn format_grep_search_text(
result: &GrepResult,
project_root: &Path,
interpreted_as: &str,
) -> String {
let base = crate::commands::grep::format_grep_text(result, project_root);
format!("{base}\n[interpreted_as: {interpreted_as}]")
}
fn snippet_line_budget(global_rank: usize) -> usize {
match global_rank {
0 => 20,
1 | 2 => 5,
_ => 0,
}
}
#[cfg(test)]
fn enrich_snippets_from_source(results: &mut [HybridResult], project_root: &Path) -> bool {
enrich_snippets_from_source_with_context(results, project_root, None)
}
fn enrich_snippets_from_source_with_context(
results: &mut [HybridResult],
project_root: &Path,
ctx: Option<&AppContext>,
) -> bool {
let mut file_lines: HashMap<PathBuf, Option<Vec<String>>> = HashMap::new();
let mut incomplete = false;
for (rank, result) in results.iter_mut().enumerate() {
if result.source == "lexical" || matches!(result.kind, SymbolKind::FileSummary) {
continue;
}
let budget = snippet_line_budget(rank);
if budget == 0 {
if result.end_line >= result.start_line {
incomplete = true;
}
result.snippet = String::new();
continue;
}
let lines = file_lines.entry(result.file.clone()).or_insert_with(|| {
std::fs::read_to_string(&result.file)
.ok()
.map(|content| content.lines().map(str::to_string).collect())
});
let Some(lines) = lines else {
result.snippet = String::new();
continue;
};
let start = (result.start_line as usize).min(lines.len());
let end = ((result.end_line as usize) + 1).min(lines.len());
if start >= end {
result.snippet = String::new();
continue;
}
if should_expand_rank0_snippet(rank, result, project_root) {
let rendered = render_rank0_symbol_snippet(result, lines, ctx);
match rendered.status {
BudgetedSymbolRenderStatus::Complete => {
result.snippet = append_rank0_full_symbol_notice(rendered.content);
continue;
}
BudgetedSymbolRenderStatus::Truncated | BudgetedSymbolRenderStatus::Menu => {
result.snippet = rendered.content;
incomplete = true;
continue;
}
}
}
let range_len = end - start;
let shown = range_len.min(budget);
let mut snippet = lines[start..start + shown].join("\n");
let remaining = range_len - shown;
if remaining > 0 {
snippet.push_str(&format!("\n+{remaining} more lines"));
incomplete = true;
}
result.snippet = snippet;
}
incomplete
}
fn render_rank0_symbol_snippet(
result: &HybridResult,
lines: &[String],
ctx: Option<&AppContext>,
) -> crate::commands::symbol_render::BudgetedSymbolRender {
let target = symbol_for_rank0_render(result, ctx).unwrap_or_else(|| symbol_from_result(result));
let outline = ctx.and_then(|ctx| {
if might_have_container_members(&target) {
build_container_outline(ctx, &result.file, &target).ok()
} else {
None
}
});
render_symbol_within_budget(
&target,
lines,
crate::parser::detect_language(&result.file),
outline.as_ref(),
RANK0_FULL_SNIPPET_MAX_LINES,
)
}
fn symbol_for_rank0_render(ctx_result: &HybridResult, ctx: Option<&AppContext>) -> Option<Symbol> {
let symbols = ctx?.provider().list_symbols(&ctx_result.file).ok()?;
symbols
.iter()
.find(|symbol| symbol_matches_result(symbol, ctx_result, true))
.cloned()
.or_else(|| {
symbols
.into_iter()
.find(|symbol| symbol_matches_result(symbol, ctx_result, false))
})
}
fn symbol_matches_result(symbol: &Symbol, result: &HybridResult, exact_range: bool) -> bool {
symbol.name == result.name
&& symbol.kind == result.kind
&& (!exact_range
|| (symbol.range.start_line == result.start_line
&& symbol.range.end_line == result.end_line))
}
fn symbol_from_result(result: &HybridResult) -> Symbol {
Symbol {
name: result.name.clone(),
kind: result.kind.clone(),
range: Range {
start_line: result.start_line,
start_col: 0,
end_line: result.end_line,
end_col: 0,
},
signature: None,
scope_chain: Vec::new(),
exported: result.exported,
parent: None,
}
}
fn append_rank0_full_symbol_notice(content: String) -> String {
if content.is_empty() {
RANK0_FULL_SYMBOL_NOTICE.to_string()
} else {
format!("{content}\n{RANK0_FULL_SYMBOL_NOTICE}")
}
}
fn should_expand_rank0_snippet(rank: usize, result: &HybridResult, project_root: &Path) -> bool {
rank == 0
&& result
.semantic_score
.is_some_and(|cosine| cosine >= HIGH_CONFIDENCE_COSINE_FLOOR)
&& !path_is_test_support_file(&result.file, project_root)
}
fn format_result_sections(results: &[HybridResult], project_root: &Path) -> String {
format_result_sections_with_context(results, project_root, None)
}
fn format_result_sections_with_context(
results: &[HybridResult],
project_root: &Path,
ctx: Option<&AppContext>,
) -> String {
let annotations = ctx
.map(|ctx| blast_radius_annotations(ctx, results))
.unwrap_or_else(|| vec![None; results.len()]);
let mut group_order: Vec<String> = Vec::new();
let mut groups: HashMap<String, Vec<(usize, &HybridResult)>> = HashMap::new();
for (index, result) in results.iter().enumerate() {
let display_path = result
.file
.strip_prefix(project_root)
.unwrap_or(&result.file)
.display()
.to_string();
if !groups.contains_key(&display_path) {
group_order.push(display_path.clone());
}
groups
.entry(display_path)
.or_default()
.push((index, result));
}
group_order
.iter()
.map(|file| {
let mut section = file.clone();
for (index, result) in &groups[file] {
if result.source == "lexical" {
section.push_str(" [lexical match]");
continue;
}
if matches!(result.kind, SymbolKind::FileSummary) {
section.push_str(&format!("\n {} [file summary]", result.name));
} else {
section.push_str(&format!(
"\n {} [{}] lines {}-{}{}",
result.name,
symbol_kind_label(&result.kind),
display_line_number(result.start_line),
display_line_number(result.end_line),
annotations
.get(*index)
.and_then(|annotation| annotation.as_deref())
.unwrap_or("")
));
}
if !result.snippet.trim().is_empty() {
for line in result.snippet.lines() {
section.push_str("\n ");
section.push_str(line);
}
}
}
section
})
.collect::<Vec<_>>()
.join("\n\n")
}
fn blast_radius_annotations(ctx: &AppContext, results: &[HybridResult]) -> Vec<Option<String>> {
let Some(store) = warm_callgraph_store(ctx) else {
return vec![None; results.len()];
};
results
.iter()
.map(|result| blast_radius_annotation_for_result(&store, result))
.collect()
}
fn warm_callgraph_store(
ctx: &AppContext,
) -> Option<std::sync::Arc<crate::callgraph_store::CallGraphStore>> {
if ctx.callgraph_store_rx().lock().is_some() {
return None;
}
ctx.revalidate_callgraph_store_generation();
ctx.callgraph_store()
.read()
.unwrap_or_else(std::sync::PoisonError::into_inner)
.as_ref()
.map(std::sync::Arc::clone)
}
fn blast_radius_annotation_for_result(
store: &crate::callgraph_store::CallGraphStore,
result: &HybridResult,
) -> Option<String> {
if result.source == "lexical" || matches!(result.kind, SymbolKind::FileSummary) {
return None;
}
if result.name.trim().is_empty() {
return None;
}
let callers = callers_result(store, &result.file, &result.name, 1, true).ok()?;
let mut caller_basenames = Vec::new();
let mut seen_files = HashSet::new();
for group in &callers.callers {
if seen_files.insert(group.file.clone()) {
caller_basenames.push(compact_caller_basename(&group.file));
}
}
let mut suffix = format!(" ↩{}", callers.total_callers);
if !caller_basenames.is_empty() {
let more = caller_basenames.len() > 2;
let names = caller_basenames
.iter()
.take(2)
.cloned()
.collect::<Vec<_>>()
.join(",");
suffix.push(' ');
suffix.push_str(&names);
if more {
suffix.push_str(",…");
}
}
Some(suffix)
}
fn compact_caller_basename(file: &str) -> String {
let basename = Path::new(file)
.file_name()
.and_then(|name| name.to_str())
.unwrap_or(file);
truncate_chars(basename, 18)
}
fn truncate_chars(value: &str, max_chars: usize) -> String {
let mut chars = value.chars();
let truncated = chars.by_ref().take(max_chars).collect::<String>();
if chars.next().is_some() {
format!("{truncated}…")
} else {
truncated
}
}
fn result_to_json(result: &HybridResult) -> serde_json::Value {
let is_file_level = matches!(result.kind, SymbolKind::FileSummary);
let (start_line, end_line) = if is_file_level {
(serde_json::Value::Null, serde_json::Value::Null)
} else {
(
serde_json::json!(display_line_number(result.start_line)),
serde_json::json!(display_line_number(result.end_line)),
)
};
serde_json::json!({
"file": result.file.display().to_string(),
"name": result.name,
"kind": result.kind,
"start_line": start_line,
"end_line": end_line,
"location": if result.source == "lexical" { "[lexical match]" } else if is_file_level { "[file summary]" } else { "line range" },
"score": result.score,
"source": result.source,
"semantic_score": result.semantic_score,
"lexical_score": result.lexical_score,
"hybrid_boosted": result.hybrid_boosted,
"snippet": result.snippet,
})
}
fn grep_match_to_json(grep_match: &GrepMatch, source: &'static str) -> serde_json::Value {
serde_json::json!({
"kind": "GrepLine",
"source": source,
"file": grep_match.file.display().to_string(),
"line": grep_match.line,
"column": grep_match.column,
"line_text": grep_match.line_text,
"match_text": grep_match.match_text,
})
}
fn display_line_number(line: u32) -> u32 {
line.saturating_add(1)
}
fn symbol_kind_label(kind: &SymbolKind) -> &'static str {
match kind {
SymbolKind::Function => "function",
SymbolKind::Class => "class",
SymbolKind::Method => "method",
SymbolKind::Struct => "struct",
SymbolKind::Interface => "interface",
SymbolKind::Enum => "enum",
SymbolKind::TypeAlias => "type_alias",
SymbolKind::Variable => "variable",
SymbolKind::Heading => "heading",
SymbolKind::FileSummary => "file-summary",
}
}
fn semantic_status_label(status: &SemanticIndexStatus) -> &'static str {
match status {
SemanticIndexStatus::Ready { .. } => "ready",
SemanticIndexStatus::Building { .. } => "building",
SemanticIndexStatus::Disabled => "disabled",
SemanticIndexStatus::Failed(_) => "unavailable",
}
}
fn interpreted_as_label(mode: SearchMode) -> &'static str {
match mode {
SearchMode::Regex => "regex",
SearchMode::Literal => "literal",
SearchMode::Semantic => "semantic",
SearchMode::Hybrid => "hybrid",
}
}
fn fallback_executed_label(mode: SearchMode, lexical_ran: bool) -> &'static str {
if lexical_ran {
"lexical"
} else {
interpreted_as_label(mode)
}
}
fn query_kind_label(kind: QueryKind) -> &'static str {
match kind {
QueryKind::Identifier => "Identifier",
QueryKind::Mixed => "Mixed",
QueryKind::ErrorCode => "ErrorCode",
QueryKind::Path => "Path",
QueryKind::Regex => "Regex",
QueryKind::NaturalLanguage => "NaturalLanguage",
}
}
fn strip_surrounding_quotes(query: String) -> String {
let trimmed = query.trim();
if trimmed.len() < 2 {
return query;
}
let first = trimmed.chars().next().unwrap();
let last = trimmed.chars().next_back().unwrap();
if (first == '"' || first == '\'') && first == last {
let mut chars = trimmed.chars();
chars.next();
chars.next_back();
return chars.as_str().to_string();
}
query
}
fn literal_tokens_all_short(query: &str) -> bool {
let tokens = query
.split(|ch: char| !ch.is_ascii_alphanumeric() && ch != '_')
.filter(|token| !token.is_empty())
.collect::<Vec<_>>();
!tokens.is_empty() && tokens.iter().all(|token| token.len() < 3)
}
fn extracted_tokens_all_short(query: &str, shape: &QueryShape) -> bool {
let tokens = query_shape::extract_tokens(query, shape);
!tokens.is_empty() && tokens.iter().all(|token| token.len() < 3)
}
pub fn humanize_degraded_reasons(reasons: &[String]) -> Vec<String> {
reasons.iter().map(|code| humanize_one(code)).collect()
}
fn humanize_one(code: &str) -> String {
if code == "home_root" {
return "Project root is set to your home directory; large file-system indexes are disabled to avoid scanning the whole home tree.".into();
}
if code == "watcher_unavailable" {
return "file watcher unavailable; continuing without live external-change invalidation"
.to_string();
}
format!("(Degraded: {})", code)
}
fn degraded_warning(ctx: &AppContext) -> String {
let mut text = "Lexical search ran in degraded full-file-scan mode.".to_string();
let reasons = ctx.degraded_reasons();
if !reasons.is_empty() {
text.push_str(" Reasons: ");
text.push_str(&humanize_degraded_reasons(&reasons).join("; "));
}
text
}
#[cfg(test)]
mod tests {
use super::*;
use crate::callgraph::walk_project_files;
use crate::callgraph_store::CallGraphStore;
use crate::config::{Config, SemanticBackend, SemanticBackendConfig};
use crate::context::{
callgraph_cold_build_spawn_count_for_test, reset_callgraph_cold_build_spawn_count_for_test,
AppContext,
};
use crate::parser::TreeSitterProvider;
use crate::semantic_index::SemanticIndex;
use std::io::{Read, Write};
use std::net::TcpListener;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::thread;
fn semantic_request(query: &str, top_k: usize) -> RawRequest {
serde_json::from_value(serde_json::json!({
"id": "semantic-search-test",
"command": "semantic_search",
"query": query,
"top_k": top_k,
}))
.expect("build semantic search request")
}
fn semantic_request_with_hint(query: &str, top_k: usize, hint: &str) -> RawRequest {
serde_json::from_value(serde_json::json!({
"id": "semantic-search-test",
"command": "semantic_search",
"query": query,
"top_k": top_k,
"hint": hint,
}))
.expect("build semantic search request")
}
fn response_value(response: Response) -> serde_json::Value {
serde_json::to_value(response).expect("serialize response")
}
fn test_context(project_root: &Path) -> AppContext {
AppContext::new(
Box::new(TreeSitterProvider::new()),
Config {
project_root: Some(project_root.to_path_buf()),
..Config::default()
},
)
}
fn install_warm_callgraph_store(ctx: &AppContext, project_root: &Path) {
let root = std::fs::canonicalize(project_root).expect("canonical project root");
let files = walk_project_files(&root).collect::<Vec<_>>();
let store = CallGraphStore::open(root.join(".callgraph-store-test"), root)
.expect("open callgraph store");
store.cold_build(&files).expect("build callgraph store");
*ctx.callgraph_store()
.write()
.unwrap_or_else(std::sync::PoisonError::into_inner) = Some(Arc::new(store));
}
fn start_mock_embedding_server() -> (String, thread::JoinHandle<()>) {
let listener = TcpListener::bind("127.0.0.1:0").expect("bind embedding server");
let addr = listener.local_addr().expect("embedding server addr");
let handle = thread::spawn(move || {
let (mut stream, _) = listener.accept().expect("accept embedding request");
let mut buf = Vec::new();
let mut chunk = [0u8; 4096];
let mut header_end = None;
let mut content_length = 0usize;
loop {
let n = stream.read(&mut chunk).expect("read embedding request");
if n == 0 {
break;
}
buf.extend_from_slice(&chunk[..n]);
if header_end.is_none() {
if let Some(pos) = buf.windows(4).position(|window| window == b"\r\n\r\n") {
header_end = Some(pos + 4);
for line in String::from_utf8_lossy(&buf[..pos + 4]).lines() {
if let Some(value) = line.strip_prefix("Content-Length:") {
content_length = value.trim().parse::<usize>().unwrap_or(0);
}
}
}
}
if let Some(end) = header_end {
if buf.len() >= end + content_length {
break;
}
}
}
let body = r#"{"data":[{"embedding":[0.1,0.2,0.3],"index":0}]}"#;
let response = format!(
"HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}",
body.len(),
body
);
stream
.write_all(response.as_bytes())
.expect("write embedding response");
});
(format!("http://{}", addr), handle)
}
#[test]
fn embed_query_construction_error_leaves_slot_empty_for_retry() {
let project = tempfile::tempdir().expect("create project dir");
let ctx = AppContext::new(
Box::new(TreeSitterProvider::new()),
Config {
project_root: Some(project.path().to_path_buf()),
semantic: SemanticBackendConfig {
backend: SemanticBackend::OpenAiCompatible,
model: "test-embedding".to_string(),
base_url: None,
api_key_env: None,
timeout_ms: 5_000,
max_batch_size: 64,
max_files: 20_000,
},
..Config::default()
},
);
let err = embed_query("anything", &ctx).expect_err("construction should fail");
assert!(
err.contains("base_url is required"),
"expected missing base_url construction error, got: {err}"
);
assert!(
ctx.semantic_embedding_model().lock().is_none(),
"failed model construction must not poison the lazy slot"
);
let (base_url, handle) = start_mock_embedding_server();
ctx.update_config(|config| {
config.semantic.base_url = Some(base_url);
});
let vector = embed_query("anything", &ctx).expect("retry should construct and embed");
assert_eq!(vector, vec![0.1, 0.2, 0.3]);
assert!(
ctx.semantic_embedding_model().lock().is_some(),
"successful retry should install the constructed model"
);
handle.join().expect("embedding server thread");
}
#[test]
fn short_nl_concept_routes_to_hybrid_when_lexical_ready() {
let shape = query_shape::classify("parse imports");
assert_eq!(shape.kind, QueryKind::NaturalLanguage);
let mut warnings = Vec::new();
let mode = choose_mode(
SearchHint::Auto,
"parse imports",
&shape,
true,
&mut warnings,
);
assert_eq!(mode, SearchMode::Hybrid);
}
#[test]
fn long_nl_phrase_stays_semantic() {
let q = "how does the bridge resolve the binary";
let shape = query_shape::classify(q);
assert_eq!(shape.kind, QueryKind::NaturalLanguage);
let mut warnings = Vec::new();
let mode = choose_mode(SearchHint::Auto, q, &shape, true, &mut warnings);
assert_eq!(mode, SearchMode::Semantic);
}
#[test]
fn short_nl_extracts_lexical_tokens() {
let tokens = query_shape::extract_short_nl_lexical_tokens("parse imports");
assert_eq!(tokens, vec!["parse".to_string(), "imports".to_string()]);
let tokens2 = query_shape::extract_short_nl_lexical_tokens("go to");
assert!(tokens2.is_empty());
}
#[test]
fn building_status_returns_lexical_fallback_results() {
let project = tempfile::tempdir().expect("create project dir");
let source_file = project.path().join("src/lib.rs");
std::fs::create_dir_all(source_file.parent().expect("source parent"))
.expect("create source dir");
let source = "pub fn needle_symbol() -> bool { true }\n";
std::fs::write(&source_file, source).expect("write source file");
let ctx = test_context(project.path());
let mut index = SearchIndex::new();
index.index_file(&source_file, source.as_bytes());
index.ready = true;
*ctx.search_index()
.write()
.unwrap_or_else(std::sync::PoisonError::into_inner) = Some(index);
*ctx.semantic_index_status()
.write()
.unwrap_or_else(std::sync::PoisonError::into_inner) = SemanticIndexStatus::Building {
stage: "embedding".to_string(),
files: Some(1),
entries_done: Some(0),
entries_total: Some(1),
};
let response = response_value(handle_semantic_search(
&semantic_request("needle_symbol", 5),
&ctx,
));
assert_eq!(response["success"], true);
assert_eq!(response["status"], "building");
assert_eq!(response["semantic_status"], "building");
assert_eq!(response["interpreted_as"], "lexical");
assert!(response["note"]
.as_str()
.expect("note")
.contains("lexical-only fallback"));
assert!(response["text"]
.as_str()
.expect("text")
.contains("lexical fallback"));
let results = response["results"].as_array().expect("results array");
assert!(
results.iter().any(|result| {
result["source"] == "lexical"
&& result["file"]
.as_str()
.expect("file")
.ends_with("src/lib.rs")
}),
"expected lexical fallback result, got {results:?}"
);
}
#[test]
fn regex_query_runs_without_semantic_index() {
let project = tempfile::tempdir().expect("create project dir");
let source_file = project.path().join("src/lib.rs");
std::fs::create_dir_all(source_file.parent().expect("source parent"))
.expect("create source dir");
std::fs::write(&source_file, "pub fn exported() {}\n").expect("write source file");
let ctx = test_context(project.path());
*ctx.semantic_index_status()
.write()
.unwrap_or_else(std::sync::PoisonError::into_inner) = SemanticIndexStatus::Disabled;
let response = response_value(handle_semantic_search(
&semantic_request_with_hint(".*exported", 5, "regex"),
&ctx,
));
assert_eq!(response["success"], true);
assert_eq!(response["interpreted_as"], "regex");
assert_eq!(response["query_kind"], "Regex");
assert_eq!(response["semantic_status"], "disabled");
assert_eq!(response["results"][0]["kind"], "GrepLine");
}
#[test]
fn auto_regexlike_uncompilable_query_falls_back_to_literal() {
let project = tempfile::tempdir().expect("create project dir");
let source_file = project.path().join("src/lib.rs");
std::fs::create_dir_all(source_file.parent().expect("source parent"))
.expect("create source dir");
std::fs::write(
&source_file,
"// assert_ne!(.*route_channel\nassert_ne!(route_channel, 0);\n",
)
.expect("write source file");
let ctx = test_context(project.path());
let response = response_value(handle_semantic_search(
&semantic_request_with_hint("assert_ne!(.*route_channel", 5, "auto"),
&ctx,
));
assert_eq!(response["success"], true);
assert_eq!(response["interpreted_as"], "literal");
let results = response["results"].as_array().expect("results array");
assert!(
!results.is_empty(),
"expected literal fallback result, got {results:?}"
);
assert_eq!(response["results"][0]["source"], "literal");
assert_eq!(
response["results"][0]["match_text"],
"assert_ne!(.*route_channel"
);
let warnings = response["warnings"].as_array().expect("warnings array");
let fallback_warning = warnings
.iter()
.filter_map(|warning| warning.as_str())
.find(|warning| warning.contains("searched literally instead"))
.expect("fallback warning");
assert!(fallback_warning.contains("unclosed group"));
assert!(fallback_warning.contains("Pass hint:\"regex\" to force regex."));
}
#[test]
fn explicit_regex_uncompilable_query_still_errors() {
let project = tempfile::tempdir().expect("create project dir");
let ctx = test_context(project.path());
let response = response_value(handle_semantic_search(
&semantic_request_with_hint("assert_ne!(.*route_channel", 5, "regex"),
&ctx,
));
assert_eq!(response["success"], false);
assert_eq!(response["code"], "invalid_pattern");
}
#[test]
fn valid_auto_regex_query_stays_regex_without_fallback_warning() {
let project = tempfile::tempdir().expect("create project dir");
let source_file = project.path().join("src/lib.rs");
std::fs::create_dir_all(source_file.parent().expect("source parent"))
.expect("create source dir");
std::fs::write(&source_file, "let route_alpha_channel = 1;\n").expect("write source file");
let ctx = test_context(project.path());
let response = response_value(handle_semantic_search(
&semantic_request_with_hint("route_.*channel", 5, "auto"),
&ctx,
));
assert_eq!(response["success"], true);
assert_eq!(response["interpreted_as"], "regex");
assert_eq!(response["results"][0]["source"], "regex");
let warnings = response["warnings"].as_array().expect("warnings array");
assert!(!warnings.iter().any(|warning| {
warning
.as_str()
.expect("warning")
.contains("searched literally instead")
}));
}
#[test]
fn literal_hint_short_token_warns_and_runs_grep_line_results() {
let project = tempfile::tempdir().expect("create project dir");
let source_file = project.path().join("src/lib.rs");
std::fs::create_dir_all(source_file.parent().expect("source parent"))
.expect("create source dir");
std::fs::write(&source_file, "id = 1\n").expect("write source file");
let ctx = test_context(project.path());
let response = response_value(handle_semantic_search(
&semantic_request_with_hint("id", 5, "literal"),
&ctx,
));
assert_eq!(response["success"], true);
assert_eq!(response["interpreted_as"], "literal");
assert!(response["warnings"][0]
.as_str()
.expect("warning")
.contains("shorter than 3"));
}
#[test]
fn unsupported_regex_returns_specific_error() {
let project = tempfile::tempdir().expect("create project dir");
let ctx = test_context(project.path());
let response = response_value(handle_semantic_search(
&semantic_request_with_hint("(?=foo)", 5, "regex"),
&ctx,
));
assert_eq!(response["success"], false);
assert_eq!(response["code"], "unsupported_pattern");
assert!(response["message"]
.as_str()
.expect("message")
.contains("lookaround"));
}
#[test]
fn humanize_degraded_reason_messages() {
let reasons = vec![
"home_root".to_string(),
"watcher_unavailable".to_string(),
"custom".to_string(),
];
let human = humanize_degraded_reasons(&reasons);
assert!(human[0].contains("home directory"));
assert_eq!(
human[1],
"file watcher unavailable; continuing without live external-change invalidation"
);
assert_eq!(human[2], "(Degraded: custom)");
assert!(human.join("; ").contains("; "));
}
#[test]
fn semantic_candidate_limit_scales_with_small_top_k() {
assert_eq!(semantic_candidate_limit(1), SEMANTIC_OVERFETCH_FLOOR);
assert_eq!(semantic_candidate_limit(5), 15);
assert_eq!(semantic_candidate_limit(100), MAX_TOP_K);
}
fn rerank_shape(kind: QueryKind) -> QueryShape {
QueryShape {
kind,
weights: query_shape::ShapeWeights {
semantic: 0.0,
lexical: 0.0,
should_use_lexical: false,
},
}
}
fn semantic_candidate(
file: &str,
name: &str,
qualified_name: Option<&str>,
kind: SymbolKind,
score: f32,
) -> SemanticResult {
SemanticResult {
file: PathBuf::from(file),
name: name.to_string(),
qualified_name: qualified_name.map(str::to_string),
kind,
start_line: 0,
end_line: 0,
exported: false,
snippet: String::new(),
score,
rank_score: score,
cap_protected: false,
source: "semantic",
}
}
fn candidate_rank<'a>(results: &'a [SemanticResult], name: &str) -> &'a SemanticResult {
results
.iter()
.find(|result| result.name == name)
.expect("candidate present")
}
#[test]
fn type_concept_identifier_detector_fires_only_for_titlecase_concepts() {
for query in [
"Engine implementations",
"Engine handlers",
"Allocation strategies",
"EngineFactory implementations",
] {
let shape = query_shape::classify(query);
assert_eq!(shape.kind, QueryKind::Identifier, "{query}");
assert!(
query_shape::is_type_concept_identifier_query(query, &shape),
"{query} should get definition priors"
);
}
for query in [
"engineFactory",
"useState hook",
"parseConfig option",
"Engine",
] {
let shape = query_shape::classify(query);
assert_eq!(shape.kind, QueryKind::Identifier, "{query}");
assert!(
!query_shape::is_type_concept_identifier_query(query, &shape),
"{query} should keep Identifier priors inert"
);
}
for query in ["get user", "parse config"] {
let shape = query_shape::classify(query);
assert_eq!(shape.kind, QueryKind::NaturalLanguage, "{query}");
assert!(!query_shape::is_type_concept_identifier_query(
query, &shape
));
}
}
#[test]
fn type_concept_identifier_kind_prior_raises_definitions() {
let shape = rerank_shape(QueryKind::Identifier);
let candidates = vec![
semantic_candidate(
"/project/src/engine.ts",
"engineCache",
None,
SymbolKind::Variable,
0.80,
),
semantic_candidate(
"/project/src/engine.ts",
"engineState",
None,
SymbolKind::Variable,
0.79,
),
semantic_candidate(
"/project/src/renderer.ts",
"Renderer",
Some("Renderer"),
SymbolKind::Class,
0.75,
),
];
let mut type_concept_candidates = candidates.clone();
rerank_semantic_candidates(
&mut type_concept_candidates,
&shape,
"Engine implementations",
);
let type_concept_results = fuse_hybrid_results(
type_concept_candidates,
Vec::new(),
&shape,
10,
true,
Path::new("/project"),
);
assert_eq!(type_concept_results[0].name, "Renderer");
let mut plain_identifier_candidates = candidates;
rerank_semantic_candidates(&mut plain_identifier_candidates, &shape, "engineFactory");
assert!(plain_identifier_candidates
.iter()
.all(|result| (result.rank_score - result.score).abs() < f32::EPSILON));
let plain_identifier_results = fuse_hybrid_results(
plain_identifier_candidates,
Vec::new(),
&shape,
10,
true,
Path::new("/project"),
);
assert_eq!(plain_identifier_results[0].name, "engineCache");
assert_eq!(plain_identifier_results[1].name, "engineState");
assert_eq!(plain_identifier_results[2].name, "Renderer");
}
#[test]
fn type_concept_identifier_diversity_cap_limits_repeated_clusters_only() {
let shape = rerank_shape(QueryKind::Identifier);
let mut repeated_candidates = vec![
semantic_candidate(
"/project/src/a.ts",
"Engine",
Some("Engine"),
SymbolKind::Class,
0.90,
),
semantic_candidate(
"/project/src/b.ts",
"Engine",
Some("Engine"),
SymbolKind::Class,
0.89,
),
semantic_candidate(
"/project/src/c.ts",
"Engine",
Some("Engine"),
SymbolKind::Class,
0.88,
),
];
rerank_semantic_candidates(&mut repeated_candidates, &shape, "Engine implementations");
assert_eq!(repeated_candidates.len(), 2);
assert!(repeated_candidates
.iter()
.all(|result| result.name == "Engine"));
let mut distinct_candidates = vec![
semantic_candidate(
"/project/src/renderer.ts",
"Renderer",
Some("Renderer"),
SymbolKind::Class,
0.80,
),
semantic_candidate(
"/project/src/parser.ts",
"Parser",
Some("Parser"),
SymbolKind::Class,
0.79,
),
semantic_candidate(
"/project/src/planner.ts",
"Planner",
Some("Planner"),
SymbolKind::Class,
0.78,
),
];
rerank_semantic_candidates(&mut distinct_candidates, &shape, "Engine implementations");
assert_eq!(distinct_candidates.len(), 3);
assert!(distinct_candidates
.iter()
.all(|result| result.rank_score > result.score));
}
#[test]
fn type_concept_identifier_exact_name_boost_composes_with_kind_prior() {
let shape = rerank_shape(QueryKind::Identifier);
let mut candidates = vec![
semantic_candidate(
"/project/src/engine.ts",
"Engine",
Some("Engine"),
SymbolKind::Class,
0.70,
),
semantic_candidate(
"/project/src/renderer.ts",
"Renderer",
Some("Renderer"),
SymbolKind::Class,
0.75,
),
];
rerank_semantic_candidates(&mut candidates, &shape, "Engine implementations");
let named = candidate_rank(&candidates, "Engine");
let sibling = candidate_rank(&candidates, "Renderer");
assert!(named.rank_score > sibling.rank_score);
assert!((named.rank_score - (0.70 * 1.08 * 1.20)).abs() < 0.0001);
assert!((sibling.rank_score - (0.75 * 1.08)).abs() < 0.0001);
}
#[test]
fn natural_language_kind_prior_and_semantic_only_fuse_raise_definitions() {
let shape = rerank_shape(QueryKind::NaturalLanguage);
let mut candidates = vec![
semantic_candidate(
"/project/src/engine.ts",
"engineFactory",
None,
SymbolKind::Variable,
0.80,
),
semantic_candidate(
"/project/src/engine.ts",
"engineCache",
None,
SymbolKind::Variable,
0.79,
),
semantic_candidate(
"/project/src/engine.ts",
"engineState",
None,
SymbolKind::Variable,
0.78,
),
semantic_candidate(
"/project/src/engine.ts",
"Engine",
Some("Engine"),
SymbolKind::Class,
0.76,
),
];
rerank_semantic_candidates(&mut candidates, &shape, "Engine implementations");
let results = fuse_hybrid_results(
candidates,
Vec::new(),
&shape,
10,
true,
Path::new("/project"),
);
assert_eq!(results[0].name, "Engine");
assert_eq!(results[0].semantic_score, Some(0.76));
assert!(
results[0].score > 0.80,
"definition prior should drive semantic-only ordering"
);
}
#[test]
fn natural_language_diversity_cap_limits_repeated_name_kind_clusters() {
let nl_shape = rerank_shape(QueryKind::NaturalLanguage);
let mixed_shape = rerank_shape(QueryKind::Mixed);
let candidates = vec![
semantic_candidate(
"/project/src/a.ts",
"engineFactory",
None,
SymbolKind::Variable,
0.90,
),
semantic_candidate(
"/project/src/b.ts",
"engineFactory",
None,
SymbolKind::Variable,
0.89,
),
semantic_candidate(
"/project/src/c.ts",
"engineFactory",
None,
SymbolKind::Variable,
0.88,
),
];
let mut nl_candidates = candidates.clone();
rerank_semantic_candidates(
&mut nl_candidates,
&nl_shape,
"engine factory implementations",
);
assert_eq!(nl_candidates.len(), 2);
let mut mixed_candidates = candidates;
rerank_semantic_candidates(
&mut mixed_candidates,
&mixed_shape,
"engineFactory implementations",
);
assert_eq!(mixed_candidates.len(), 3);
assert!(mixed_candidates
.iter()
.all(|result| (result.rank_score - result.score).abs() < f32::EPSILON));
}
#[test]
fn exact_identifier_definition_boost_and_cap_protection_survive_file_cap() {
let shape = rerank_shape(QueryKind::Identifier);
let file = "/project/src/allocation.ts";
let mut candidates = vec![
semantic_candidate(file, "unrelatedField", None, SymbolKind::Variable, 0.99),
semantic_candidate(file, "unrelatedMethod", None, SymbolKind::Method, 0.98),
semantic_candidate(
file,
"AllocationService",
Some("AllocationService"),
SymbolKind::Class,
0.80,
),
semantic_candidate(file, "allocationService", None, SymbolKind::Variable, 0.79),
semantic_candidate(file, "allocate", None, SymbolKind::Method, 0.78),
];
rerank_semantic_candidates(&mut candidates, &shape, "AllocationService");
let class = candidate_rank(&candidates, "AllocationService");
assert!(class.rank_score > candidate_rank(&candidates, "allocationService").rank_score);
assert!(class.rank_score > candidate_rank(&candidates, "allocate").rank_score);
assert!(class.cap_protected);
let fused = fuse_hybrid_results(
candidates,
vec![(PathBuf::from(file), 1.0)],
&shape,
10,
true,
Path::new("/project"),
);
assert!(fused
.iter()
.any(|result| result.name == "AllocationService"));
assert_eq!(
fused
.iter()
.filter(|result| result.file.as_path() == Path::new(file))
.count(),
3,
"protected exact-name hit should be reserved in addition to two ordinary siblings"
);
}
#[test]
fn qualified_name_tokens_rank_nested_definitions_over_outer_types() {
let nl_shape = rerank_shape(QueryKind::NaturalLanguage);
let id_shape = rerank_shape(QueryKind::Identifier);
let candidates = vec![
semantic_candidate(
"/project/src/engine.ts",
"Engine",
Some("Engine"),
SymbolKind::Class,
0.80,
),
semantic_candidate(
"/project/src/engine.ts",
"Index",
Some("Engine.Index"),
SymbolKind::Class,
0.70,
),
];
let mut spaced = candidates.clone();
rerank_semantic_candidates(&mut spaced, &nl_shape, "Engine Index");
let spaced_results = fuse_hybrid_results(
spaced,
Vec::new(),
&nl_shape,
10,
true,
Path::new("/project"),
);
assert_eq!(spaced_results[0].name, "Index");
let mut dotted = candidates;
rerank_semantic_candidates(&mut dotted, &id_shape, "Engine.Index");
let dotted_results = fuse_hybrid_results(
dotted,
Vec::new(),
&id_shape,
10,
true,
Path::new("/project"),
);
assert_eq!(dotted_results[0].name, "Index");
}
#[test]
fn identifier_engine_factory_and_non_name_shapes_leave_priors_inert() {
let identifier_shape = rerank_shape(QueryKind::Identifier);
let mut identifier_candidates = vec![
semantic_candidate(
"/project/src/engine.ts",
"engineFactory",
None,
SymbolKind::Variable,
0.80,
),
semantic_candidate(
"/project/src/engine.ts",
"EngineFactory",
Some("EngineFactory"),
SymbolKind::Class,
0.79,
),
];
rerank_semantic_candidates(
&mut identifier_candidates,
&identifier_shape,
"engineFactory",
);
assert_eq!(
identifier_candidates[0].rank_score,
identifier_candidates[0].score
);
assert_eq!(
identifier_candidates[1].rank_score,
identifier_candidates[1].score
);
assert!(!identifier_candidates
.iter()
.any(|result| result.cap_protected));
let identifier_results = fuse_hybrid_results(
identifier_candidates,
Vec::new(),
&identifier_shape,
10,
true,
Path::new("/project"),
);
assert_eq!(identifier_results[0].name, "engineFactory");
for kind in [QueryKind::Path, QueryKind::Regex, QueryKind::ErrorCode] {
let shape = rerank_shape(kind);
let mut candidates = vec![semantic_candidate(
"/project/src/engine.ts",
"EngineFactory",
Some("EngineFactory"),
SymbolKind::Class,
0.79,
)];
rerank_semantic_candidates(&mut candidates, &shape, "EngineFactory");
assert_eq!(candidates[0].rank_score, candidates[0].score);
assert!(!candidates[0].cap_protected);
}
}
#[test]
fn exact_name_boost_does_not_cap_protect_near_zero_common_names() {
let shape = rerank_shape(QueryKind::Identifier);
let mut candidates = vec![semantic_candidate(
"/project/src/list.ts",
"List",
Some("List"),
SymbolKind::Class,
0.01,
)];
rerank_semantic_candidates(&mut candidates, &shape, "List");
assert!((candidates[0].rank_score - 0.012).abs() < 0.0001);
assert!(!candidates[0].cap_protected);
}
#[test]
fn boosted_rank_score_keeps_raw_cosine_honesty_checks() {
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().join("allocation.rs");
let body = (0..30)
.map(|index| format!("line{index}"))
.collect::<Vec<_>>()
.join("\n");
std::fs::write(&path, body).expect("write symbol body");
let shape = rerank_shape(QueryKind::Identifier);
let mut candidates = vec![SemanticResult {
file: path,
name: "AllocationService".to_string(),
qualified_name: Some("AllocationService".to_string()),
kind: SymbolKind::Class,
start_line: 0,
end_line: 29,
exported: false,
snippet: String::new(),
score: HIGH_CONFIDENCE_COSINE_FLOOR - 0.01,
rank_score: HIGH_CONFIDENCE_COSINE_FLOOR - 0.01,
cap_protected: false,
source: "semantic",
}];
rerank_semantic_candidates(&mut candidates, &shape, "AllocationService");
assert!(candidates[0].rank_score > HIGH_CONFIDENCE_COSINE_FLOOR);
let mut fused = fuse_hybrid_results(candidates, Vec::new(), &shape, 10, true, dir.path());
assert_eq!(
fused[0].semantic_score,
Some(HIGH_CONFIDENCE_COSINE_FLOOR - 0.01)
);
let incomplete = enrich_snippets_from_source(&mut fused, dir.path());
assert!(incomplete);
assert!(fused[0].snippet.contains("line19"));
assert!(!fused[0].snippet.contains("line29"));
assert!(!fused[0].snippet.contains(RANK0_FULL_SYMBOL_NOTICE));
let mut weak = vec![semantic_candidate(
"/project/src/weak.ts",
"WeakService",
Some("WeakService"),
SymbolKind::Class,
WEAK_MATCH_COSINE_FLOOR - 0.01,
)];
rerank_semantic_candidates(&mut weak, &shape, "WeakService");
let weak_results =
fuse_hybrid_results(weak, Vec::new(), &shape, 10, true, Path::new("/project"));
assert!(weak_results[0].score > WEAK_MATCH_COSINE_FLOOR);
let text = format_semantic_text(&weak_results, Path::new("/project"), false, false, None);
assert!(text.contains("Top match is weak"), "got: {text}");
}
#[test]
fn empty_semantic_index_skips_query_dimension_check() {
let project = tempfile::tempdir().expect("create project dir");
let (base_url, handle) = start_mock_embedding_server();
let ctx = AppContext::new(
Box::new(TreeSitterProvider::new()),
Config {
project_root: Some(project.path().to_path_buf()),
semantic: SemanticBackendConfig {
backend: SemanticBackend::OpenAiCompatible,
model: "test-embedding".to_string(),
base_url: Some(base_url),
api_key_env: None,
timeout_ms: 5_000,
max_batch_size: 64,
max_files: 20_000,
},
..Config::default()
},
);
*ctx.semantic_index_status()
.write()
.unwrap_or_else(std::sync::PoisonError::into_inner) = SemanticIndexStatus::ready();
*ctx.semantic_index()
.write()
.unwrap_or_else(std::sync::PoisonError::into_inner) =
Some(SemanticIndex::new(project.path().to_path_buf(), 384));
let response = response_value(handle_semantic_search(
&semantic_request("anything", 5),
&ctx,
));
assert_eq!(
response["success"], true,
"response should not fail: {response:?}"
);
assert_eq!(response["status"], "ready");
assert_eq!(response["semantic_status"], "ready");
assert!(response["results"].as_array().expect("results").is_empty());
handle.join().expect("embedding server thread");
}
#[test]
fn file_summary_text_uses_summary_location_instead_of_line_range() {
let project_root = Path::new("/project");
let results = vec![HybridResult {
file: PathBuf::from("/project/src/index.ts"),
name: "index".to_string(),
kind: SymbolKind::FileSummary,
start_line: 0,
end_line: 0,
exported: false,
snippet: String::new(),
score: 0.75,
source: "semantic",
semantic_score: Some(0.75),
lexical_score: None,
hybrid_boosted: false,
cap_protected: false,
lexical_generated_artifact: false,
}];
let text = format_semantic_text(&results, project_root, false, false, None);
assert!(text.contains("index [file summary]"));
assert!(!text.contains("lines 1-1"));
assert!(!text.contains("score"));
assert!(!text.contains("source semantic"));
}
fn write_symbol_hit(
dir: &Path,
file_name: &str,
name: &str,
body_lines: usize,
) -> HybridResult {
let path = dir.join(file_name);
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent).expect("create symbol parent");
}
let body = (0..body_lines)
.map(|i| format!("line{i}"))
.collect::<Vec<_>>()
.join("\n");
std::fs::write(&path, &body).expect("write symbol file");
HybridResult {
file: path,
name: name.to_string(),
kind: SymbolKind::Function,
start_line: 0,
end_line: (body_lines.saturating_sub(1)) as u32,
exported: false,
snippet: String::new(),
score: 0.5,
source: "semantic",
semantic_score: Some(0.5),
lexical_score: None,
hybrid_boosted: false,
cap_protected: false,
lexical_generated_artifact: false,
}
}
#[test]
fn rows_omit_score_and_source() {
let dir = tempfile::tempdir().expect("tempdir");
let mut results = vec![write_symbol_hit(dir.path(), "a.rs", "foo", 2)];
let incomplete = enrich_snippets_from_source(&mut results, dir.path());
let text = format_semantic_text(&results, dir.path(), false, incomplete, None);
assert!(text.contains("foo [function] lines 1-2"));
assert!(!text.contains("score"));
assert!(!text.contains("source"));
}
#[test]
fn snippets_are_rank_tiered_top_three_only_from_source() {
let dir = tempfile::tempdir().expect("tempdir");
let mut results: Vec<HybridResult> = (0..5)
.map(|i| write_symbol_hit(dir.path(), &format!("f{i}.rs"), &format!("fn{i}"), 30))
.collect();
let incomplete = enrich_snippets_from_source(&mut results, dir.path());
assert!(incomplete);
let text = format_semantic_text(&results, dir.path(), false, incomplete, None);
assert!(text.contains("fn0 [function]"));
assert!(text.contains("+10 more lines"));
assert!(text.contains("+25 more lines"));
let body_lines =
|r: &HybridResult| r.snippet.lines().filter(|l| l.starts_with("line")).count();
assert_eq!(body_lines(&results[0]), 20);
assert_eq!(body_lines(&results[1]), 5);
assert!(
results[3].snippet.is_empty(),
"rank 4+ must have no snippet"
);
assert!(
results[4].snippet.is_empty(),
"rank 4+ must have no snippet"
);
assert!(text.contains("aft_zoom <file> <symbol>"));
}
#[test]
fn high_confidence_rank0_expands_full_symbol_body() {
let dir = tempfile::tempdir().expect("tempdir");
let mut results = vec![write_symbol_hit(dir.path(), "full.rs", "full", 30)];
results[0].semantic_score = Some(HIGH_CONFIDENCE_COSINE_FLOOR);
results[0].score = HIGH_CONFIDENCE_COSINE_FLOOR;
let incomplete = enrich_snippets_from_source(&mut results, dir.path());
assert!(
!incomplete,
"full rank-0 symbol should not need a zoom hint"
);
assert!(results[0].snippet.contains("line29"));
assert!(results[0].snippet.contains(RANK0_FULL_SYMBOL_NOTICE));
assert!(!results[0].snippet.contains("+10 more lines"));
}
#[test]
fn subfloor_rank0_keeps_preview_budget() {
let dir = tempfile::tempdir().expect("tempdir");
let mut results = vec![write_symbol_hit(dir.path(), "preview.rs", "preview", 30)];
results[0].semantic_score = Some(HIGH_CONFIDENCE_COSINE_FLOOR - 0.01);
results[0].score = HIGH_CONFIDENCE_COSINE_FLOOR - 0.01;
let incomplete = enrich_snippets_from_source(&mut results, dir.path());
assert!(incomplete);
assert!(results[0].snippet.contains("line19"));
assert!(!results[0].snippet.contains("line29"));
assert!(results[0].snippet.contains("+10 more lines"));
}
#[test]
fn test_support_rank0_never_full_expands() {
let dir = tempfile::tempdir().expect("tempdir");
let mut results = vec![write_symbol_hit(
dir.path(),
"fixtures/full.rs",
"fixture",
30,
)];
results[0].semantic_score = Some(0.99);
results[0].score = 0.99;
let incomplete = enrich_snippets_from_source(&mut results, dir.path());
assert!(incomplete);
assert!(!results[0].snippet.contains("line29"));
assert!(results[0].snippet.contains("+10 more lines"));
}
#[test]
fn rank0_large_container_renders_member_menu_without_full_notice() {
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().join("large.ts");
let mut content = String::from(
"class BigContainer {\n methodOne(): number {\n const visibleMethodBodyLine = 1;\n",
);
for i in 0..155 {
content.push_str(&format!(" const filler{i} = {i};\n"));
}
content.push_str(
" return visibleMethodBodyLine;\n }\n\n methodTwo(): void {\n console.log(\"second\");\n }\n}\n",
);
std::fs::write(&path, content).expect("write large class");
let ctx = test_context(dir.path());
let symbols = ctx.provider().list_symbols(&path).expect("list symbols");
let target = symbols
.iter()
.find(|symbol| symbol.name == "BigContainer")
.expect("BigContainer symbol");
let mut results = vec![HybridResult {
file: path,
name: "BigContainer".to_string(),
kind: SymbolKind::Class,
start_line: target.range.start_line,
end_line: target.range.end_line,
exported: false,
snippet: String::new(),
score: 0.99,
source: "semantic",
semantic_score: Some(0.99),
lexical_score: None,
hybrid_boosted: false,
cap_protected: false,
lexical_generated_artifact: false,
}];
let incomplete =
enrich_snippets_from_source_with_context(&mut results, dir.path(), Some(&ctx));
let snippet = &results[0].snippet;
assert!(incomplete, "member menu is not a complete body");
assert!(
snippet.contains("member-signature menu; zoom a member for its body"),
"large container should render a member menu: {snippet}"
);
assert!(
snippet.contains("BigContainer.methodOne(): number"),
"menu should include qualified method signatures: {snippet}"
);
assert!(
!snippet.contains("visibleMethodBodyLine"),
"menu must not include the class body: {snippet}"
);
assert!(
!snippet.contains(RANK0_FULL_SYMBOL_NOTICE),
"member menu must not claim the full symbol was shown: {snippet}"
);
}
#[test]
fn oversized_rank0_full_expansion_renders_budgeted_head_slice() {
let dir = tempfile::tempdir().expect("tempdir");
let mut results = vec![write_symbol_hit(dir.path(), "huge.rs", "huge", 300)];
results[0].semantic_score = Some(0.99);
results[0].score = 0.99;
let incomplete = enrich_snippets_from_source(&mut results, dir.path());
assert!(incomplete);
assert!(results[0].snippet.contains("line249"));
assert!(!results[0].snippet.contains("line299"));
assert!(results[0]
.snippet
.contains("… +50 more lines — zoom huge for the full body"));
assert!(
!results[0].snippet.contains(RANK0_FULL_SYMBOL_NOTICE),
"capped fallback is incomplete — must NOT claim no-re-read"
);
}
#[test]
fn rank0_expansion_includes_leading_doc_and_excludes_trailing_neighbor() {
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().join("doc.ts");
let content = "import x from 'y';\n\
\n\
/** Does the thing. */\n\
@decorator\n\
export function target() {\n\
\x20\x20return 1;\n\
}\n\
\n\
export function nextSymbol() {\n\
\x20\x20return 2;\n\
}\n";
std::fs::write(&path, content).expect("write");
let mut results = vec![HybridResult {
file: path,
name: "target".to_string(),
kind: SymbolKind::Function,
start_line: 4,
end_line: 6,
exported: true,
snippet: String::new(),
score: 0.99,
source: "semantic",
semantic_score: Some(0.99),
lexical_score: None,
hybrid_boosted: false,
cap_protected: false,
lexical_generated_artifact: false,
}];
enrich_snippets_from_source(&mut results, dir.path());
let snippet = &results[0].snippet;
assert!(
snippet.contains("Does the thing."),
"leading doc comment must be included: {snippet}"
);
assert!(
snippet.contains("@decorator"),
"leading decorator must be included: {snippet}"
);
assert!(
snippet.contains("export function target()"),
"symbol signature must be present: {snippet}"
);
assert!(
!snippet.contains("nextSymbol"),
"trailing neighbor must NOT bleed in: {snippet}"
);
assert!(
snippet.contains(RANK0_FULL_SYMBOL_NOTICE),
"full expansion must carry the no-re-read notice: {snippet}"
);
}
#[test]
fn rank0_expansion_does_not_include_c_preprocessor_lines() {
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().join("target.c");
let content = "#include <x.h>
int target(void) {
return 0;
}
";
std::fs::write(&path, content).expect("write");
let mut results = vec![HybridResult {
file: path,
name: "target".to_string(),
kind: SymbolKind::Function,
start_line: 1,
end_line: 3,
exported: false,
snippet: String::new(),
score: HIGH_CONFIDENCE_COSINE_FLOOR,
source: "semantic",
semantic_score: Some(HIGH_CONFIDENCE_COSINE_FLOOR),
lexical_score: None,
hybrid_boosted: false,
cap_protected: false,
lexical_generated_artifact: false,
}];
enrich_snippets_from_source(&mut results, dir.path());
let snippet = &results[0].snippet;
assert!(
snippet.contains("int target(void)"),
"symbol signature must be present: {snippet}"
);
assert!(
!snippet.contains("#include <x.h>"),
"C preprocessor directives must not be treated as symbol docs: {snippet}"
);
assert!(
snippet.contains(RANK0_FULL_SYMBOL_NOTICE),
"full expansion must carry the no-re-read notice: {snippet}"
);
}
#[test]
fn weak_top_match_emits_low_confidence_note() {
let dir = tempfile::tempdir().expect("tempdir");
let mut hit = write_symbol_hit(dir.path(), "a.rs", "foo", 2);
hit.semantic_score = Some(0.22);
hit.score = 0.22;
let results = vec![hit];
let text = format_semantic_text(&results, dir.path(), false, false, None);
assert!(
text.contains("Top match is weak"),
"expected weak-match note, got: {text}"
);
}
#[test]
fn strong_top_match_has_no_low_confidence_note() {
let dir = tempfile::tempdir().expect("tempdir");
let mut hit = write_symbol_hit(dir.path(), "a.rs", "foo", 2);
hit.semantic_score = Some(0.72);
hit.score = 0.72;
let results = vec![hit];
let text = format_semantic_text(&results, dir.path(), false, false, None);
assert!(!text.contains("Top match is weak"), "got: {text}");
assert!(!text.contains("[index: ready]"), "got: {text}");
}
#[test]
fn no_zoom_hint_when_all_snippets_fit() {
let dir = tempfile::tempdir().expect("tempdir");
let mut results = vec![
write_symbol_hit(dir.path(), "a.rs", "foo", 3),
write_symbol_hit(dir.path(), "b.rs", "bar", 3),
];
let incomplete = enrich_snippets_from_source(&mut results, dir.path());
assert!(!incomplete);
let text = format_semantic_text(&results, dir.path(), false, incomplete, None);
assert!(!text.contains("+"), "no truncation marker expected: {text}");
assert!(!text.contains("aft_zoom"), "no zoom hint expected: {text}");
}
#[test]
fn enrich_handles_missing_file_gracefully() {
let dir = tempfile::tempdir().expect("tempdir");
let mut results = vec![HybridResult {
file: dir.path().join("does-not-exist.rs"),
name: "ghost".to_string(),
kind: SymbolKind::Function,
start_line: 0,
end_line: 9,
exported: false,
snippet: String::new(),
score: 0.5,
source: "semantic",
semantic_score: Some(0.5),
lexical_score: None,
hybrid_boosted: false,
cap_protected: false,
lexical_generated_artifact: false,
}];
let _ = enrich_snippets_from_source(&mut results, dir.path());
assert!(results[0].snippet.is_empty());
let text = format_result_sections(&results, dir.path());
assert!(text.contains("ghost [function]"));
}
#[test]
fn groups_render_in_rank_order_not_alphabetical() {
let dir = tempfile::tempdir().expect("tempdir");
let results = vec![
write_symbol_hit(dir.path(), "zzz.rs", "top", 1),
write_symbol_hit(dir.path(), "aaa.rs", "second", 1),
];
let text = format_result_sections(&results, dir.path());
let zzz_at = text.find("zzz.rs").expect("zzz present");
let aaa_at = text.find("aaa.rs").expect("aaa present");
assert!(zzz_at < aaa_at, "top-ranked file must render first: {text}");
}
#[test]
fn warm_callgraph_adds_compact_blast_radius_suffixes() {
let dir = tempfile::tempdir().expect("tempdir");
let src_dir = dir.path().join("src");
let fixture_dir = dir.path().join("fixtures");
std::fs::create_dir_all(&src_dir).expect("create src");
std::fs::create_dir_all(&fixture_dir).expect("create fixtures");
let target_file = src_dir.join("target.ts");
std::fs::write(
&target_file,
"export function covered() {\n return 1;\n}\nexport function untested() {\n return 2;\n}\n",
)
.expect("write target");
std::fs::write(
src_dir.join("app.ts"),
"import { covered, untested } from './target';\nexport function callerOne() {\n return covered() + untested();\n}\nexport function callerTwo() {\n return covered();\n}\n",
)
.expect("write app");
std::fs::write(
fixture_dir.join("covered_fixture.ts"),
"import { covered } from '../src/target';\nexport function fixtureCaller() {\n return covered();\n}\n",
)
.expect("write fixture");
let ctx = test_context(dir.path());
install_warm_callgraph_store(&ctx, dir.path());
let results = vec![
HybridResult {
file: target_file.clone(),
name: "covered".to_string(),
kind: SymbolKind::Function,
start_line: 0,
end_line: 2,
exported: true,
snippet: String::new(),
score: 0.8,
source: "semantic",
semantic_score: Some(0.8),
lexical_score: None,
hybrid_boosted: false,
cap_protected: false,
lexical_generated_artifact: false,
},
HybridResult {
file: target_file,
name: "untested".to_string(),
kind: SymbolKind::Function,
start_line: 3,
end_line: 5,
exported: true,
snippet: String::new(),
score: 0.7,
source: "semantic",
semantic_score: Some(0.7),
lexical_score: None,
hybrid_boosted: false,
cap_protected: false,
lexical_generated_artifact: false,
},
];
let text = format_semantic_text(&results, dir.path(), false, false, Some(&ctx));
let covered_line = text
.lines()
.find(|line| line.contains("covered [function]"))
.expect("covered row");
assert!(
covered_line.contains("↩"),
"covered row should show callers: {text}"
);
assert!(
covered_line.contains("app.ts") || covered_line.contains("covered_fixture.ts"),
"covered row should include caller basenames: {covered_line}"
);
let untested_line = text
.lines()
.find(|line| line.contains("untested [function]"))
.expect("untested row");
assert!(
untested_line.contains("↩"),
"untested row should show callers: {text}"
);
assert!(
untested_line.contains("app.ts"),
"untested caller basename missing: {untested_line}"
);
assert!(
!text.contains("⚠untested"),
"untested marker must no longer appear in search output: {text}"
);
}
#[test]
fn absent_warm_callgraph_emits_no_blast_radius_and_starts_no_build() {
let dir = tempfile::tempdir().expect("tempdir");
let ctx = test_context(dir.path());
reset_callgraph_cold_build_spawn_count_for_test();
let results = vec![write_symbol_hit(dir.path(), "target.rs", "target", 1)];
let text = format_semantic_text(&results, dir.path(), false, false, Some(&ctx));
assert!(
!text.contains("↩"),
"cold store should not annotate rows: {text}"
);
assert_eq!(callgraph_cold_build_spawn_count_for_test(), 0);
assert!(ctx.callgraph_store_rx().lock().is_none());
}
#[test]
fn more_available_appends_raise_topk_note() {
let dir = tempfile::tempdir().expect("tempdir");
let results = vec![write_symbol_hit(dir.path(), "a.rs", "foo", 1)];
let text = format_semantic_text(&results, dir.path(), true, false, None);
assert!(text.contains("More results available; raise topK to see more."));
}
#[test]
fn file_summary_json_uses_summary_location_instead_of_line_numbers() {
let result = HybridResult {
file: PathBuf::from("/project/src/index.ts"),
name: "index".to_string(),
kind: SymbolKind::FileSummary,
start_line: 0,
end_line: 0,
exported: false,
snippet: String::new(),
score: 0.75,
source: "semantic",
semantic_score: Some(0.75),
lexical_score: None,
hybrid_boosted: false,
cap_protected: false,
lexical_generated_artifact: false,
};
let json = result_to_json(&result);
assert_eq!(json["kind"], "file_summary");
assert_eq!(json["location"], "[file summary]");
assert!(json["start_line"].is_null());
assert!(json["end_line"].is_null());
assert_eq!(json["source"], "semantic");
assert_eq!(json["semantic_score"], 0.75);
assert!(json["lexical_score"].is_null());
}
}