use std::io::IsTerminal;
use std::sync::Arc;
use anyhow::{Context, Result, bail};
use rqmd_core::Store;
use rqmd_core::llm::traits::Llm;
use rqmd_core::store::virtual_path::resolve_virtual_path;
use rqmd_core::store_ops::{
ExpandedQuery, ExpandedQueryType, HybridQueryOptions, SearchHooks, StructuredSearchOptions,
hybrid_query, structured_search,
};
use serde_json::json;
use crate::cli::QueryArgs;
use crate::collection_filter::{
filter_by_collections, resolve_collection_filter, single_collection,
};
use crate::color::Palette;
use crate::output::OutputFormat;
use crate::search_view::{
CliLinkCtx, ExplainView, Hit, editor_uri_template, hybrid_result_to_hit, print_hits,
to_qmd_path,
};
use crate::state::IndexState;
pub async fn run(args: QueryArgs, state: &mut IndexState, p: &Palette) -> Result<()> {
let q = args.query.join(" ");
let fmt = OutputFormat::from(&args.format);
let parsed = parse_structured_query(&q)?;
let intent = resolve_query_intent(
args.intent.as_deref(),
parsed.as_ref().and_then(|pq| pq.intent.as_deref()),
);
let limit = Some(if args.flags.all {
500
} else {
args.flags.limit.unwrap_or(10)
});
let min_score = Some(args.flags.min_score.unwrap_or(0.0));
let index_name = state.index_name().to_string();
let idx = (index_name != "index").then_some(index_name.as_str());
let display_query = parsed
.as_ref()
.map(|pq| {
pq.searches
.iter()
.find(|s| s.type_ == ExpandedQueryType::Lex)
.or_else(|| {
pq.searches
.iter()
.find(|s| s.type_ == ExpandedQueryType::Vec)
})
.map(|s| s.query.clone())
.unwrap_or_else(|| q.clone())
})
.unwrap_or_else(|| q.clone());
let collection_names =
resolve_collection_filter(state.config_mut()?, &args.flags.collection, true)?;
let single = single_collection(&collection_names);
let link = CliLinkCtx {
editor_template: editor_uri_template(state.config_mut()?.data().editor_uri.as_deref()),
stdout_tty: std::io::stdout().is_terminal(),
};
let llm = state.llama_cpp()?;
let store: &Store = state.store_mut()?;
let llm_dyn: Arc<dyn Llm> = llm;
let results = if let Some(pq) = &parsed {
log_structured_summary(pq, intent.as_deref(), fmt, p);
structured_search(
store,
llm_dyn,
&pq.searches,
StructuredSearchOptions {
collections: single.clone().map(|c| vec![c]),
limit,
min_score,
candidate_limit: args.candidate_limit,
explain: args.explain,
intent,
skip_rerank: args.no_rerank,
chunk_strategy: args.chunk_strategy.map(Into::into),
hooks: build_structured_hooks(fmt),
},
)
.await
.context("structured query failed")?
} else {
let opts = HybridQueryOptions {
collection: single.clone(),
limit,
min_score,
candidate_limit: args.candidate_limit,
explain: args.explain,
intent,
skip_rerank: args.no_rerank,
chunk_strategy: args.chunk_strategy.map(Into::into),
hooks: build_query_hooks(fmt),
};
hybrid_query(store, llm_dyn, &q, opts)
.await
.context("query failed")?
};
let results = filter_by_collections(results, &collection_names, |r| r.file.as_str());
let mut hits: Vec<Hit> = results
.iter()
.map(|r| hybrid_result_to_hit(r, args.flags.full, idx))
.collect();
if fmt == OutputFormat::Json && args.explain {
let explains: Vec<ExplainView<'_>> = results
.iter()
.filter_map(|r| {
r.explain
.as_ref()
.map(|e| ExplainView::new(to_qmd_path(&r.display_path, idx), e))
})
.collect();
let s = serde_json::to_string_pretty(&json!({
"hits": hits,
"explain": explains,
}))?;
println!("{s}");
} else {
if fmt == OutputFormat::Cli && link.stdout_tty {
store.with_connection(|conn| {
for h in &mut hits {
h.abs_path = resolve_virtual_path(conn, &h.file)
.ok()
.flatten()
.map(|pp| pp.to_string_lossy().into_owned());
}
});
}
print_hits(
&hits,
fmt,
p,
args.flags.line_numbers,
&display_query,
&link,
)?;
}
Ok(())
}
#[derive(Debug)]
struct ParsedStructuredQuery {
searches: Vec<ExpandedQuery>,
intent: Option<String>,
}
fn resolve_query_intent(flag: Option<&str>, parsed: Option<&str>) -> Option<String> {
flag.filter(|s| !s.is_empty())
.or(parsed)
.map(str::to_string)
}
fn parse_structured_query(query: &str) -> Result<Option<ParsedStructuredQuery>> {
let lines: Vec<(usize, &str)> = query
.split('\n')
.enumerate()
.map(|(idx, line)| (idx + 1, line.trim()))
.filter(|(_, t)| !t.is_empty())
.collect();
if lines.is_empty() {
return Ok(None);
}
let mut searches: Vec<ExpandedQuery> = Vec::new();
let mut intent: Option<String> = None;
for (number, trimmed) in &lines {
let lower = trimmed.to_lowercase();
if lower.starts_with("expand:") {
if lines.len() > 1 {
bail!(
"Line {number} starts with expand:, but query documents cannot mix expand with typed lines. Submit a single expand query instead."
);
}
let text = trimmed["expand:".len()..].trim();
if text.is_empty() {
bail!("expand: query must include text.");
}
return Ok(None); }
if lower.starts_with("intent:") {
if intent.is_some() {
bail!("Line {number}: only one intent: line is allowed per query document.");
}
let text = trimmed["intent:".len()..].trim();
if text.is_empty() {
bail!("Line {number}: intent: must include text.");
}
intent = Some(text.to_string());
continue;
}
if let Some((type_, prefix)) = match_prefix(&lower) {
let text = trimmed[prefix.len()..].trim();
if text.is_empty() {
let name = &prefix[..prefix.len() - 1]; bail!("Line {number} ({name}:) must include text.");
}
if text.contains(['\r', '\n']) {
let name = &prefix[..prefix.len() - 1];
bail!(
"Line {number} ({name}:) contains a newline. Keep each query on a single line."
);
}
searches.push(ExpandedQuery {
type_,
query: text.to_string(),
line: Some(*number),
});
continue;
}
if lines.len() == 1 {
return Ok(None);
}
bail!(
"Line {number} is missing a lex:/vec:/hyde:/intent: prefix. Each line in a query document must start with one."
);
}
if intent.is_some() && searches.is_empty() {
bail!("intent: cannot appear alone. Add at least one lex:, vec:, or hyde: line.");
}
Ok(if searches.is_empty() {
None
} else {
Some(ParsedStructuredQuery { searches, intent })
})
}
fn match_prefix(lower: &str) -> Option<(ExpandedQueryType, &'static str)> {
if lower.starts_with("lex:") {
Some((ExpandedQueryType::Lex, "lex:"))
} else if lower.starts_with("vec:") {
Some((ExpandedQueryType::Vec, "vec:"))
} else if lower.starts_with("hyde:") {
Some((ExpandedQueryType::Hyde, "hyde:"))
} else {
None
}
}
fn type_label(t: ExpandedQueryType) -> &'static str {
match t {
ExpandedQueryType::Lex => "lex",
ExpandedQueryType::Vec => "vec",
ExpandedQueryType::Hyde => "hyde",
}
}
fn log_structured_summary(
pq: &ParsedStructuredQuery,
intent: Option<&str>,
fmt: OutputFormat,
p: &Palette,
) {
if fmt != OutputFormat::Cli {
return;
}
let labels: Vec<&str> = pq.searches.iter().map(|s| type_label(s.type_)).collect();
eprintln!(
"{}Structured search: {} queries ({}){}",
p.dim(),
pq.searches.len(),
labels.join("+"),
p.reset()
);
if let Some(i) = intent {
eprintln!("{}├─ intent: {i}{}", p.dim(), p.reset());
}
for s in &pq.searches {
let oneline = s.query.replace('\n', " ");
let preview = if oneline.chars().count() > 72 {
format!("{}...", oneline.chars().take(69).collect::<String>())
} else {
oneline
};
eprintln!(
"{}├─ {}: {preview}{}",
p.dim(),
type_label(s.type_),
p.reset()
);
}
eprintln!("{}└─ Searching...{}", p.dim(), p.reset());
}
fn build_query_hooks(fmt: OutputFormat) -> SearchHooks {
if fmt != OutputFormat::Cli {
return SearchHooks::default();
}
SearchHooks {
on_strong_signal: Some(Arc::new(|top| {
eprintln!("Strong BM25 signal ({top:.2}) — skipping expansion");
})),
on_expand_start: Some(Arc::new(|| {
eprintln!("Expanding query...");
})),
on_expand: Some(Arc::new(|orig, expanded: &[ExpandedQuery], ms| {
eprintln!("Expanded \"{orig}\" -> {} queries ({ms}ms)", expanded.len());
for e in expanded {
eprintln!(" [{:?}] {}", e.type_, e.query);
}
})),
on_embed_start: Some(Arc::new(|n| {
eprintln!("Embedding {n} queries...");
})),
on_embed_done: Some(Arc::new(|ms| {
eprintln!(" embedded ({ms}ms)");
})),
on_rerank_start: Some(Arc::new(|n| {
eprintln!("Reranking {n} candidates...");
})),
on_rerank_done: Some(Arc::new(|ms| {
eprintln!(" reranked ({ms}ms)");
})),
}
}
fn build_structured_hooks(fmt: OutputFormat) -> SearchHooks {
if fmt != OutputFormat::Cli {
return SearchHooks::default();
}
SearchHooks {
on_embed_start: Some(Arc::new(|n| {
eprintln!("Embedding {n} queries...");
})),
on_embed_done: Some(Arc::new(|ms| {
eprintln!(" embedded ({ms}ms)");
})),
on_rerank_start: Some(Arc::new(|n| {
eprintln!("Reranking {n} candidates...");
})),
on_rerank_done: Some(Arc::new(|ms| {
eprintln!(" reranked ({ms}ms)");
})),
..SearchHooks::default()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn parse(q: &str) -> Result<Option<ParsedStructuredQuery>> {
parse_structured_query(q)
}
#[test]
fn intent_empty_flag_falls_through_to_parsed() {
assert_eq!(
resolve_query_intent(Some(""), Some("parsed")),
Some("parsed".to_string())
);
}
#[test]
fn intent_empty_flag_and_no_parsed_is_none() {
assert_eq!(resolve_query_intent(Some(""), None), None);
}
#[test]
fn intent_flag_wins_over_parsed() {
assert_eq!(
resolve_query_intent(Some("flag"), Some("parsed")),
Some("flag".to_string())
);
}
#[test]
fn intent_whitespace_flag_is_kept() {
assert_eq!(
resolve_query_intent(Some(" "), Some("parsed")),
Some(" ".to_string())
);
}
#[test]
fn intent_no_flag_uses_parsed() {
assert_eq!(
resolve_query_intent(None, Some("parsed")),
Some("parsed".to_string())
);
}
#[test]
fn intent_no_flag_no_parsed_is_none() {
assert_eq!(resolve_query_intent(None, None), None);
}
#[test]
fn intent_flag_only_is_kept() {
assert_eq!(
resolve_query_intent(Some("flag"), None),
Some("flag".to_string())
);
}
#[test]
fn plain_single_line_is_not_structured() {
assert!(parse("CAP theorem").unwrap().is_none());
}
#[test]
fn blank_query_is_not_structured() {
assert!(parse(" \n ").unwrap().is_none());
}
#[test]
fn single_expand_is_not_structured() {
assert!(parse("expand: auth stuff").unwrap().is_none());
}
#[test]
fn expand_mixed_with_typed_errors() {
let err = parse("expand: question\nlex: keywords").unwrap_err();
assert!(err.to_string().contains("cannot mix expand"));
}
#[test]
fn expand_mixed_with_intent_errors() {
let err = parse("intent: web\nexpand: performance").unwrap_err();
assert!(err.to_string().contains("cannot mix expand"));
}
#[test]
fn empty_expand_errors() {
let err = parse("expand: ").unwrap_err();
assert!(err.to_string().contains("expand: query must include text"));
}
#[test]
fn empty_expand_with_typed_reports_mix_first() {
let err = parse("expand:\nlex: x").unwrap_err();
assert!(err.to_string().contains("cannot mix expand"));
}
#[test]
fn parses_prefixes_and_intent() {
let parsed = parse("lex: auth\nvec: secure sessions\nintent: security")
.unwrap()
.expect("structured");
assert_eq!(parsed.searches.len(), 2);
assert_eq!(parsed.searches[0].type_, ExpandedQueryType::Lex);
assert_eq!(parsed.searches[0].query, "auth");
assert_eq!(parsed.searches[1].type_, ExpandedQueryType::Vec);
assert_eq!(parsed.searches[1].query, "secure sessions");
assert_eq!(parsed.intent.as_deref(), Some("security"));
}
#[test]
fn intent_after_typed_lines() {
let parsed = parse("lex: performance\nintent: web page load times\nvec: latency")
.unwrap()
.expect("structured");
assert_eq!(parsed.searches.len(), 2);
assert_eq!(parsed.intent.as_deref(), Some("web page load times"));
}
#[test]
fn prefix_is_case_insensitive() {
let parsed = parse("HYDE: expanded text").unwrap().expect("structured");
assert_eq!(parsed.searches[0].type_, ExpandedQueryType::Hyde);
assert_eq!(parsed.searches[0].query, "expanded text");
}
#[test]
fn intent_is_case_insensitive() {
let parsed = parse("Intent: foo\nlex: bar").unwrap().expect("structured");
assert_eq!(parsed.intent.as_deref(), Some("foo"));
}
#[test]
fn duplicate_intent_errors() {
let err = parse("intent: a\nintent: b").unwrap_err();
let msg = err.to_string();
assert!(msg.contains("only one intent:"));
assert!(msg.contains("query document"));
}
#[test]
fn intent_only_errors() {
let err = parse("intent: security").unwrap_err();
assert!(err.to_string().contains("intent: cannot appear alone"));
}
#[test]
fn empty_intent_errors() {
let err = parse("intent: \nlex: x").unwrap_err();
assert!(err.to_string().contains("intent: must include text"));
}
#[test]
fn multiline_missing_prefix_errors() {
let err = parse("lex: auth\nplain line").unwrap_err();
let msg = err.to_string();
assert!(msg.contains("missing a lex:/vec:/hyde:/intent: prefix"));
assert!(msg.contains("query document"));
}
#[test]
fn empty_prefix_text_errors() {
let err = parse("lex: \nvec: x").unwrap_err();
assert!(err.to_string().contains("(lex:) must include text"));
}
#[test]
fn blank_lines_are_skipped() {
let parsed = parse("\nlex: auth\n\nvec: sessions\n")
.unwrap()
.expect("structured");
assert_eq!(parsed.searches.len(), 2);
assert_eq!(parsed.searches[0].line, Some(2));
assert_eq!(parsed.searches[1].line, Some(4));
}
#[test]
fn colon_in_text_is_preserved() {
let parsed = parse("lex: time: 12:30 PM").unwrap().expect("structured");
assert_eq!(parsed.searches[0].query, "time: 12:30 PM");
}
#[test]
fn surrounding_whitespace_trimmed() {
let parsed = parse(" lex: spaced query ")
.unwrap()
.expect("structured");
assert_eq!(parsed.searches[0].query, "spaced query");
}
#[test]
fn intent_parses_lex_query() {
let parsed = parse("intent: web performance\nlex: performance")
.unwrap()
.expect("structured");
assert_eq!(parsed.intent.as_deref(), Some("web performance"));
assert_eq!(parsed.searches.len(), 1);
assert_eq!(parsed.searches[0].type_, ExpandedQueryType::Lex);
assert_eq!(parsed.searches[0].query, "performance");
}
#[test]
fn intent_parses_multiple_typed_lines() {
let parsed =
parse("intent: web page load times\nlex: performance\nvec: how to improve performance")
.unwrap()
.expect("structured");
assert_eq!(parsed.intent.as_deref(), Some("web page load times"));
assert_eq!(parsed.searches.len(), 2);
assert_eq!(parsed.searches[0].type_, ExpandedQueryType::Lex);
assert_eq!(parsed.searches[1].type_, ExpandedQueryType::Vec);
}
#[test]
fn intent_case_insensitive_prefix() {
let parsed = parse("Intent: web perf\nlex: performance")
.unwrap()
.expect("structured");
assert_eq!(parsed.intent.as_deref(), Some("web perf"));
}
#[test]
fn intent_no_intent_returns_none() {
let parsed = parse("lex: performance\nvec: speed")
.unwrap()
.expect("structured");
assert!(parsed.intent.is_none());
}
#[test]
fn intent_alone_throws() {
let err = parse("intent: web performance").unwrap_err();
assert!(err.to_string().contains("intent: cannot appear alone"));
}
#[test]
fn intent_multiple_lines_throw() {
let err = parse("intent: web perf\nintent: team health\nlex: performance").unwrap_err();
assert!(err.to_string().contains("only one intent: line is allowed"));
}
#[test]
fn intent_empty_text_throws() {
let err = parse("intent:\nlex: performance").unwrap_err();
assert!(err.to_string().contains("intent: must include text"));
}
#[test]
fn intent_whitespace_only_throws() {
let err = parse("intent: \nlex: performance").unwrap_err();
assert!(err.to_string().contains("intent: must include text"));
}
#[test]
fn intent_single_plain_line_returns_none() {
assert!(parse("how does auth work").unwrap().is_none());
}
#[test]
fn intent_empty_query_returns_none() {
assert!(parse("").unwrap().is_none());
assert!(parse(" \n \n ").unwrap().is_none());
}
#[test]
fn intent_with_blank_lines_ok() {
let parsed = parse("intent: web perf\n\nlex: performance\n\nvec: speed")
.unwrap()
.expect("structured");
assert_eq!(parsed.intent.as_deref(), Some("web perf"));
assert_eq!(parsed.searches.len(), 2);
}
#[test]
fn intent_preserves_full_text_including_colons() {
let parsed = parse("intent: web performance: LCP, FID, CLS\nlex: performance")
.unwrap()
.expect("structured");
assert_eq!(
parsed.intent.as_deref(),
Some("web performance: LCP, FID, CLS")
);
}
#[test]
fn all_three_types() {
let parsed = parse("lex: keywords\nvec: question\nhyde: hypothetical doc")
.unwrap()
.expect("structured");
assert_eq!(parsed.searches.len(), 3);
assert_eq!(parsed.searches[0].type_, ExpandedQueryType::Lex);
assert_eq!(parsed.searches[0].query, "keywords");
assert_eq!(parsed.searches[1].type_, ExpandedQueryType::Vec);
assert_eq!(parsed.searches[1].query, "question");
assert_eq!(parsed.searches[2].type_, ExpandedQueryType::Hyde);
assert_eq!(parsed.searches[2].query, "hypothetical doc");
}
#[test]
fn duplicate_types_allowed() {
let parsed = parse("lex: term1\nlex: term2\nlex: term3")
.unwrap()
.expect("structured");
assert_eq!(parsed.searches.len(), 3);
assert!(
parsed
.searches
.iter()
.all(|s| s.type_ == ExpandedQueryType::Lex)
);
assert_eq!(parsed.searches[0].query, "term1");
assert_eq!(parsed.searches[1].query, "term2");
assert_eq!(parsed.searches[2].query, "term3");
}
#[test]
fn order_preserved() {
let parsed = parse("hyde: passage\nvec: question\nlex: keywords")
.unwrap()
.expect("structured");
assert_eq!(parsed.searches[0].type_, ExpandedQueryType::Hyde);
assert_eq!(parsed.searches[1].type_, ExpandedQueryType::Vec);
assert_eq!(parsed.searches[2].type_, ExpandedQueryType::Lex);
}
#[test]
fn uppercase_lex_and_vec_prefixes() {
let lex = parse("LEX: keywords").unwrap().expect("structured");
assert_eq!(lex.searches[0].type_, ExpandedQueryType::Lex);
assert_eq!(lex.searches[0].query, "keywords");
let vec = parse("VEC: question").unwrap().expect("structured");
assert_eq!(vec.searches[0].type_, ExpandedQueryType::Vec);
assert_eq!(vec.searches[0].query, "question");
}
#[test]
fn mixed_case_prefixes() {
let lex = parse("Lex: test").unwrap().expect("structured");
assert_eq!(lex.searches[0].type_, ExpandedQueryType::Lex);
let vec = parse("VeC: test").unwrap().expect("structured");
assert_eq!(vec.searches[0].type_, ExpandedQueryType::Vec);
}
#[test]
fn internal_whitespace_preserved() {
let parsed = parse("lex: multiple spaces ")
.unwrap()
.expect("structured");
assert_eq!(parsed.searches[0].query, "multiple spaces");
}
#[test]
fn prefix_like_text_in_query_preserved() {
let parsed = parse("vec: what does lex: mean")
.unwrap()
.expect("structured");
assert_eq!(parsed.searches[0].type_, ExpandedQueryType::Vec);
assert_eq!(parsed.searches[0].query, "what does lex: mean");
}
#[test]
fn multiple_plain_lines_error() {
let err = parse("line one\nline two").unwrap_err();
assert!(err.to_string().contains("missing a lex:/vec:/hyde:"));
}
#[test]
fn three_plain_lines_error() {
let err = parse("a\nb\nc").unwrap_err();
assert!(err.to_string().contains("missing a lex:/vec:/hyde:"));
}
#[test]
fn newline_in_hyde_passage_single_line() {
let parsed = parse("hyde: The answer is X. It means Y.")
.unwrap()
.expect("structured");
assert_eq!(parsed.searches[0].type_, ExpandedQueryType::Hyde);
assert_eq!(parsed.searches[0].query, "The answer is X. It means Y.");
}
}