use std::collections::HashMap;
use anyhow::{bail, Context, Result};
use cqs::parser::ChunkType;
use cqs::store::{ParentContext, UnifiedResult};
use cqs::{reference, Embedder, Embedding, Pattern, SearchFilter, Store};
use crate::cli::{display, signal, staleness, Cli};
fn json_overhead_for(cli: &Cli) -> usize {
if cli.json {
crate::cli::commands::JSON_OVERHEAD_PER_RESULT
} else {
0
}
}
fn emit_empty_results(query: &str, json: bool, context: Option<&str>) -> ! {
if json {
let obj = serde_json::json!({"results": [], "query": query, "total": 0});
println!("{}", obj);
} else if let Some(ctx) = context {
println!("No results found in reference '{}'.", ctx);
} else {
println!("No results found.");
}
std::process::exit(signal::ExitCode::NoResults as i32);
}
pub(crate) fn cmd_query(ctx: &crate::cli::CommandContext, query: &str) -> Result<()> {
let query_preview = if query.len() > 200 {
let mut end = 200;
while end > 0 && !query.is_char_boundary(end) {
end -= 1;
}
&query[..end]
} else {
query
};
let _span =
tracing::info_span!("cmd_query", query_len = query.len(), query = %query_preview).entered();
let cli = ctx.cli;
let store = &ctx.store;
let root = &ctx.root;
let cqs_dir = &ctx.cqs_dir;
if cli.name_only {
if cli.rerank {
bail!("--rerank requires embedding search, incompatible with --name-only");
}
if let Some(ref ref_name) = cli.ref_name {
return cmd_query_ref_name_only(cli, ref_name, query, root);
}
return cmd_query_name_only(cli, store, query, root);
}
let has_explicit_flags = cli.splade || cli.rrf || cli.rerank || cli.ref_name.is_some();
let classification = if !has_explicit_flags {
let c = cqs::search::router::classify_query(query);
tracing::info!(
category = %c.category,
confidence = %c.confidence,
strategy = %c.strategy,
"Query classified"
);
Some(c)
} else {
tracing::debug!("Explicit flags set, skipping adaptive routing");
None
};
if let Some(ref c) = classification {
if c.strategy == cqs::search::router::SearchStrategy::NameOnly {
let results = store.search_by_name(query, cli.limit)?;
if !results.is_empty() {
tracing::info!(results = results.len(), "NameOnly search succeeded");
crate::cli::telemetry::log_routed(
cqs_dir,
query,
&c.category.to_string(),
&c.confidence.to_string(),
&c.strategy.to_string(),
false,
Some(results.len()),
);
return cmd_query_name_only(cli, store, query, root);
}
tracing::info!("NameOnly returned 0 results, falling back to dense");
crate::cli::telemetry::log_routed(
cqs_dir,
query,
&c.category.to_string(),
&c.confidence.to_string(),
&c.strategy.to_string(),
true, None,
);
}
}
let effective_limit = if cli.rerank {
(cli.limit * 4).min(100)
} else {
cli.limit
};
let embedder = ctx.embedder()?;
let query_embedding = embedder.embed_query(query)?;
let languages = match &cli.lang {
Some(l) => Some(vec![l.parse().context(format!(
"Invalid language. Valid: {}",
cqs::parser::Language::valid_names_display()
))?]),
None => None,
};
let include_types = match &cli.include_type {
Some(types) => {
let parsed: Result<Vec<ChunkType>, _> = types.iter().map(|t| t.parse()).collect();
Some(parsed.with_context(|| {
format!(
"Invalid chunk type. Valid: {}",
ChunkType::valid_names().join(", ")
)
})?)
}
None if cli.include_docs => None, None => {
Some(ChunkType::code_types())
}
};
let exclude_types = match &cli.exclude_type {
Some(types) => {
let parsed: Result<Vec<ChunkType>, _> = types.iter().map(|t| t.parse()).collect();
Some(parsed.with_context(|| {
format!(
"Invalid chunk type for --exclude-type. Valid: {}",
ChunkType::valid_names().join(", ")
)
})?)
}
None => None,
};
let type_boost_types = classification.as_ref().and_then(|c| c.type_hints.clone());
#[allow(clippy::needless_update)]
let filter = SearchFilter {
languages,
include_types,
exclude_types,
path_pattern: cli.path.clone(),
name_boost: cli.name_boost,
query_text: query.to_string(),
enable_rrf: cli.rrf, enable_demotion: !cli.no_demote,
enable_splade: cli.splade,
splade_alpha: cli.splade_alpha,
type_boost_types,
..Default::default()
};
filter.validate().map_err(|e| anyhow::anyhow!(e))?;
let reranker = if cli.rerank {
Some(ctx.reranker()?)
} else {
None
};
if let Some(ref ref_name) = cli.ref_name {
return cmd_query_ref_only(
&RefQueryContext {
cli,
query,
query_embedding: &query_embedding,
filter: &filter,
root,
embedder,
reranker,
},
ref_name,
);
}
let splade_query = if cli.splade {
ctx.splade_encoder().and_then(|enc| {
match enc.encode(query) {
Ok(sv) => Some(sv),
Err(e) => {
tracing::warn!(error = %e, "SPLADE query encoding failed, falling back to cosine-only");
None
}
}
})
} else {
None
};
let splade_index = if cli.splade { ctx.splade_index() } else { None };
cmd_query_project(&QueryContext {
cli,
query,
query_embedding: &query_embedding,
filter: &filter,
store,
cqs_dir,
root,
embedder,
effective_limit,
reranker,
splade_query,
splade_index,
})
}
struct QueryContext<'a> {
cli: &'a Cli,
query: &'a str,
query_embedding: &'a Embedding,
filter: &'a SearchFilter,
store: &'a Store,
cqs_dir: &'a std::path::Path,
root: &'a std::path::Path,
embedder: &'a Embedder,
effective_limit: usize,
reranker: Option<&'a cqs::Reranker>,
splade_query: Option<cqs::splade::SparseVector>,
splade_index: Option<&'a cqs::splade::index::SpladeIndex>,
}
fn cmd_query_project(ctx: &QueryContext<'_>) -> Result<()> {
let cli = ctx.cli;
let query = ctx.query;
let query_embedding = ctx.query_embedding;
let filter = ctx.filter;
let store = ctx.store;
let cqs_dir = ctx.cqs_dir;
let root = ctx.root;
let embedder = ctx.embedder;
let effective_limit = ctx.effective_limit;
let index = crate::cli::build_vector_index(store, cqs_dir)?;
let audit_mode = cqs::audit::load_audit_state(cqs_dir);
let search_limit = if cli.pattern.is_some() {
effective_limit * 3
} else {
effective_limit
};
let splade_arg = ctx
.splade_query
.as_ref()
.and_then(|sq| ctx.splade_index.map(|si| (si, sq)));
let results = if audit_mode.is_active() {
let code_results = store.search_hybrid(
query_embedding,
filter,
search_limit,
cli.threshold,
index.as_deref(),
splade_arg,
)?;
code_results.into_iter().map(UnifiedResult::Code).collect()
} else {
if splade_arg.is_some() {
let code_results = store.search_hybrid(
query_embedding,
filter,
search_limit,
cli.threshold,
index.as_deref(),
splade_arg,
)?;
code_results.into_iter().map(UnifiedResult::Code).collect()
} else {
store.search_unified_with_index(
query_embedding,
filter,
search_limit,
cli.threshold,
index.as_deref(),
)?
}
};
let pattern: Option<Pattern> = cli
.pattern
.as_ref()
.map(|p| p.parse())
.transpose()
.context("Invalid pattern")?;
let results = if let Some(ref pat) = pattern {
let mut filtered: Vec<UnifiedResult> = results
.into_iter()
.filter(|r| match r {
UnifiedResult::Code(sr) => {
pat.matches(&sr.chunk.content, &sr.chunk.name, Some(sr.chunk.language))
}
})
.collect();
filtered.truncate(cli.limit);
filtered
} else {
results
};
let results = if let Some(reranker) = ctx.reranker {
rerank_unified(reranker, query, results, cli.limit)?
} else {
results
};
let json_overhead = json_overhead_for(cli);
let (results, token_info) = if let Some(budget) = cli.tokens {
token_pack_results(
results,
budget,
json_overhead,
embedder,
unified_text,
unified_score,
"query",
)
} else {
(results, None)
};
let parents = if cli.expand {
resolve_parent_context(&results, store, root)
} else {
HashMap::new()
};
let parents_ref = if cli.expand { Some(&parents) } else { None };
if !cli.quiet && !cli.no_stale_check {
let origins: Vec<&str> = results
.iter()
.map(|r| {
let UnifiedResult::Code(sr) = r;
sr.chunk.file.to_str().unwrap_or("")
})
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect();
if !origins.is_empty() {
staleness::warn_stale_results(store, &origins, root);
}
}
let references = if cli.include_refs {
let config = cqs::config::Config::load(root);
reference::load_references(&config.references)
} else {
Vec::new()
};
if references.is_empty() {
if results.is_empty() {
emit_empty_results(query, cli.json, None);
}
if cli.json {
display::display_unified_results_json(&results, query, parents_ref, token_info)?;
} else {
display::display_unified_results(
&results,
root,
cli.no_content,
cli.context,
parents_ref,
)?;
}
return Ok(());
}
if cli.rerank {
tracing::warn!("--rerank is not supported with multi-index search, skipping re-ranking");
}
use rayon::prelude::*;
let ref_results: Vec<_> = references
.par_iter()
.filter_map(|ref_idx| {
match reference::search_reference(
ref_idx,
query_embedding,
filter,
cli.limit,
cli.threshold,
true,
) {
Ok(r) if !r.is_empty() => Some((ref_idx.name.clone(), r)),
Err(e) => {
tracing::warn!(reference = %ref_idx.name, error = %e, "Reference search failed");
None
}
_ => None,
}
})
.collect();
let tagged = reference::merge_results(results, ref_results, cli.limit);
let (tagged, token_info) = if let Some(budget) = cli.tokens {
token_pack_results(
tagged,
budget,
json_overhead,
embedder,
|r| unified_text(&r.result),
|r| unified_score(&r.result),
"tagged",
)
} else {
(tagged, token_info)
};
if tagged.is_empty() {
emit_empty_results(query, cli.json, None);
}
if cli.json {
display::display_tagged_results_json(&tagged, query, parents_ref, token_info)?;
} else {
display::display_tagged_results(&tagged, root, cli.no_content, cli.context, parents_ref)?;
}
Ok(())
}
use crate::cli::commands::token_pack_results;
fn unified_text(r: &UnifiedResult) -> &str {
match r {
UnifiedResult::Code(sr) => sr.chunk.content.as_str(),
}
}
fn unified_score(r: &UnifiedResult) -> f32 {
match r {
UnifiedResult::Code(sr) => sr.score,
}
}
fn rerank_unified(
reranker: &cqs::Reranker,
query: &str,
results: Vec<UnifiedResult>,
limit: usize,
) -> Result<Vec<UnifiedResult>> {
let mut code_results: Vec<cqs::store::SearchResult> = results
.into_iter()
.map(|r| match r {
UnifiedResult::Code(sr) => sr,
})
.collect();
if code_results.len() > 1 {
reranker
.rerank(query, &mut code_results, limit)
.map_err(|e| anyhow::anyhow!("Reranking failed: {e}"))?;
}
Ok(code_results.into_iter().map(UnifiedResult::Code).collect())
}
fn cmd_query_name_only(
cli: &Cli,
store: &Store,
query: &str,
root: &std::path::Path,
) -> Result<()> {
let _span = tracing::info_span!("cmd_query_name_only", query).entered();
let results = store
.search_by_name(query, cli.limit)
.context("Failed to search by name")?;
if results.is_empty() {
emit_empty_results(query, cli.json, None);
}
let unified: Vec<UnifiedResult> = results.into_iter().map(UnifiedResult::Code).collect();
let json_overhead = json_overhead_for(cli);
let (unified, token_info) = if let Some(budget) = cli.tokens {
let embedder = Embedder::new(cli.try_model_config()?.clone())?;
token_pack_results(
unified,
budget,
json_overhead,
&embedder,
unified_text,
unified_score,
"name-only",
)
} else {
(unified, None)
};
let parents = if cli.expand {
resolve_parent_context(&unified, store, root)
} else {
HashMap::new()
};
let parents_ref = if cli.expand { Some(&parents) } else { None };
if cli.json {
display::display_unified_results_json(&unified, query, parents_ref, token_info)?;
} else {
display::display_unified_results(&unified, root, cli.no_content, cli.context, parents_ref)?;
}
Ok(())
}
struct RefQueryContext<'a> {
cli: &'a Cli,
query: &'a str,
query_embedding: &'a Embedding,
filter: &'a SearchFilter,
root: &'a std::path::Path,
embedder: &'a Embedder,
reranker: Option<&'a cqs::Reranker>,
}
fn cmd_query_ref_only(ctx: &RefQueryContext<'_>, ref_name: &str) -> Result<()> {
let _span = tracing::info_span!("cmd_query_ref_only", ref_name).entered();
let ref_idx = crate::cli::commands::resolve::find_reference(ctx.root, ref_name)?;
let ref_limit = if ctx.cli.rerank {
(ctx.cli.limit * 4).min(100)
} else {
ctx.cli.limit
};
let mut results = reference::search_reference(
&ref_idx,
ctx.query_embedding,
ctx.filter,
ref_limit,
ctx.cli.threshold,
false, )?;
if let Some(reranker) = ctx.reranker {
if results.len() > 1 {
reranker
.rerank(ctx.query, &mut results, ctx.cli.limit)
.map_err(|e| anyhow::anyhow!("Reranking failed: {e}"))?;
}
}
let tagged: Vec<reference::TaggedResult> = results
.into_iter()
.map(|r| reference::TaggedResult {
result: UnifiedResult::Code(r),
source: Some(ref_name.to_string()),
})
.collect();
let json_overhead = json_overhead_for(ctx.cli);
let (tagged, token_info) = if let Some(budget) = ctx.cli.tokens {
token_pack_results(
tagged,
budget,
json_overhead,
ctx.embedder,
|r| unified_text(&r.result),
|r| unified_score(&r.result),
"ref-only",
)
} else {
(tagged, None)
};
if tagged.is_empty() {
emit_empty_results(ctx.query, ctx.cli.json, Some(ref_name));
}
if ctx.cli.json {
display::display_tagged_results_json(&tagged, ctx.query, None, token_info)?;
} else {
display::display_tagged_results(
&tagged,
ctx.root,
ctx.cli.no_content,
ctx.cli.context,
None,
)?;
}
Ok(())
}
fn cmd_query_ref_name_only(
cli: &Cli,
ref_name: &str,
query: &str,
root: &std::path::Path,
) -> Result<()> {
let _span = tracing::info_span!("cmd_query_ref_name_only", ref_name).entered();
let ref_idx = crate::cli::commands::resolve::find_reference(root, ref_name)?;
let results =
reference::search_reference_by_name(&ref_idx, query, cli.limit, cli.threshold, false)?;
let tagged: Vec<reference::TaggedResult> = results
.into_iter()
.map(|r| reference::TaggedResult {
result: UnifiedResult::Code(r),
source: Some(ref_name.to_string()),
})
.collect();
let json_overhead = json_overhead_for(cli);
let (tagged, token_info) = if let Some(budget) = cli.tokens {
let embedder = Embedder::new(cli.try_model_config()?.clone())?;
token_pack_results(
tagged,
budget,
json_overhead,
&embedder,
|r| unified_text(&r.result),
|r| unified_score(&r.result),
"tagged",
)
} else {
(tagged, None)
};
if tagged.is_empty() {
emit_empty_results(query, cli.json, Some(ref_name));
}
if cli.json {
display::display_tagged_results_json(&tagged, query, None, token_info)?;
} else {
display::display_tagged_results(&tagged, root, cli.no_content, cli.context, None)?;
}
Ok(())
}
fn resolve_parent_context(
results: &[UnifiedResult],
store: &Store,
root: &std::path::Path,
) -> HashMap<String, ParentContext> {
let mut parents = HashMap::new();
let parent_ids: Vec<String> = results
.iter()
.filter_map(|r| match r {
UnifiedResult::Code(sr) => sr.chunk.parent_id.clone(),
})
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect();
if parent_ids.is_empty() {
return parents;
}
let id_refs: Vec<&str> = parent_ids.iter().map(|s| s.as_str()).collect();
let stored_parents = match store.get_chunks_by_ids(&id_refs) {
Ok(p) => p,
Err(e) => {
tracing::warn!(error = %e, "Failed to fetch parent chunks");
HashMap::new()
}
};
let mut resolved_parents: HashMap<String, ParentContext> = HashMap::new();
for result in results {
let UnifiedResult::Code(sr) = result;
let parent_id = match &sr.chunk.parent_id {
Some(id) => id,
None => continue,
};
if let Some(cached) = resolved_parents.get(parent_id) {
parents.insert(sr.chunk.id.clone(), cached.clone());
continue;
}
if let Some(parent) = stored_parents.get(parent_id) {
let ctx = ParentContext {
name: parent.name.clone(),
content: parent.content.clone(),
line_start: parent.line_start,
line_end: parent.line_end,
};
resolved_parents.insert(parent_id.clone(), ctx.clone());
parents.insert(sr.chunk.id.clone(), ctx);
} else {
let abs_path = root.join(&sr.chunk.file);
let canonical = match dunce::canonicalize(&abs_path) {
Ok(p) => p,
Err(_) => continue,
};
let canonical_root = dunce::canonicalize(root).unwrap_or_else(|_| root.to_path_buf());
if !canonical.starts_with(&canonical_root) {
tracing::warn!(
path = %sr.chunk.file.display(),
"Path escapes project root, skipping parent context"
);
continue;
}
match std::fs::read_to_string(&canonical) {
Ok(content) => {
let lines: Vec<&str> = content.lines().collect();
let start = sr.chunk.line_start.saturating_sub(1) as usize;
let end = (sr.chunk.line_end as usize).min(lines.len());
if start < end {
let parent_content = lines[start..end].join("\n");
let ctx = ParentContext {
name: sr.chunk.name.clone(),
content: parent_content,
line_start: sr.chunk.line_start,
line_end: sr.chunk.line_end,
};
resolved_parents.insert(parent_id.clone(), ctx.clone());
parents.insert(sr.chunk.id.clone(), ctx);
}
}
Err(e) => {
tracing::warn!(
path = %abs_path.display(),
error = %e,
"Failed to read source for parent context"
);
}
}
}
}
parents
}