use anyhow::Result;
use colored::Colorize;
use cqs::{
gather, gather_cross_index_with_index, normalize_path, GatherDirection, GatherOptions,
GatherResult,
};
use crate::cli::staleness;
#[derive(Debug, serde::Serialize)]
pub(crate) struct GatherOutput {
pub query: String,
pub chunks: Vec<serde_json::Value>,
pub expansion_capped: bool,
pub search_degraded: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub token_count: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub token_budget: Option<usize>,
}
pub(crate) fn build_gather_output(
result: &GatherResult,
query: &str,
token_info: Option<(usize, usize)>,
) -> GatherOutput {
let _span = tracing::info_span!("build_gather_output", query_len = query.len()).entered();
let json_chunks: Vec<serde_json::Value> = result
.chunks
.iter()
.filter_map(|c| match serde_json::to_value(c) {
Ok(v) => Some(v),
Err(e) => {
tracing::warn!(error = %e, chunk = %c.name, "Failed to serialize chunk");
None
}
})
.collect();
GatherOutput {
query: query.to_string(),
chunks: json_chunks,
expansion_capped: result.expansion_capped,
search_degraded: result.search_degraded,
token_count: token_info.map(|(used, _)| used),
token_budget: token_info.map(|(_, budget)| budget),
}
}
pub(crate) struct GatherContext<'a> {
pub ctx: &'a crate::cli::CommandContext<'a>,
pub query: &'a str,
pub expand: usize,
pub direction: GatherDirection,
pub limit: usize,
pub max_tokens: Option<usize>,
pub ref_name: Option<&'a str>,
pub json: bool,
}
pub(crate) fn cmd_gather(gctx: &GatherContext<'_>) -> Result<()> {
let ctx = gctx.ctx;
let query = gctx.query;
let expand = gctx.expand;
let direction = gctx.direction;
let limit = gctx.limit;
let max_tokens = gctx.max_tokens;
let ref_name = gctx.ref_name;
let json = gctx.json;
let _span = tracing::info_span!(
"cmd_gather",
query_len = query.len(),
expand,
limit,
?max_tokens,
?ref_name
)
.entered();
let store = &ctx.store;
let root = &ctx.root;
let cqs_dir = &ctx.cqs_dir;
let embedder = ctx.embedder()?;
let fetch_limit = if max_tokens.is_some() {
limit.max(50) } else {
limit
};
let opts = GatherOptions {
expand_depth: expand.clamp(0, 5),
direction,
limit: fetch_limit,
..GatherOptions::default()
};
let mut result = if let Some(rn) = ref_name {
let query_embedding = embedder.embed_query(query)?;
let ref_idx = crate::cli::commands::resolve::find_reference(root, rn)?;
let index = crate::cli::build_vector_index(store, cqs_dir)?;
gather_cross_index_with_index(
store,
&ref_idx,
&query_embedding,
query,
&opts,
root,
index.as_deref(),
)?
} else {
gather(store, embedder, query, &opts, root)?
};
let token_count_used = if let Some(budget) = max_tokens {
let overhead = if json {
crate::cli::commands::JSON_OVERHEAD_PER_RESULT
} else {
0
};
let chunks = std::mem::take(&mut result.chunks);
let (mut packed, used) =
crate::cli::commands::pack_gather_chunks(chunks, embedder, budget, overhead);
packed.sort_by(|a, b| {
let source_ord = match (&a.source, &b.source) {
(Some(_), None) => std::cmp::Ordering::Less,
(None, Some(_)) => std::cmp::Ordering::Greater,
_ => std::cmp::Ordering::Equal,
};
source_ord
.then(a.file.cmp(&b.file))
.then(a.line_start.cmp(&b.line_start))
.then(a.name.cmp(&b.name))
});
result.chunks = packed;
Some(used)
} else {
None
};
if !ctx.cli.quiet && !ctx.cli.no_stale_check && !result.chunks.is_empty() {
let origins: Vec<&str> = result
.chunks
.iter()
.filter(|c| c.source.is_none()) .filter_map(|c| c.file.to_str())
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect();
if !origins.is_empty() {
staleness::warn_stale_results(store, &origins, root);
}
}
if json {
let token_info = token_count_used.map(|used| (used, max_tokens.unwrap_or(0)));
let output = build_gather_output(&result, query, token_info);
println!("{}", serde_json::to_string_pretty(&output)?);
} else if result.chunks.is_empty() {
println!("No relevant code found for: {}", query);
} else {
let token_info = match (token_count_used, max_tokens) {
(Some(used), Some(budget)) => format!(" ({} of {} tokens)", used, budget),
_ => String::new(),
};
let ref_label = ref_name
.map(|rn| format!(" (cross-index via '{}')", rn))
.unwrap_or_default();
println!(
"Gathered {} chunk{}{}{} for: {}",
result.chunks.len(),
if result.chunks.len() == 1 { "" } else { "s" },
ref_label,
token_info,
query.cyan(),
);
if result.expansion_capped {
println!("{}", "Warning: expansion capped at 200 nodes".yellow());
}
if result.search_degraded {
println!(
"{}",
"Warning: batch name search failed, results may be incomplete".yellow()
);
}
println!();
let is_cross_index = ref_name.is_some();
let mut current_file = String::new();
let mut current_source: Option<String> = None;
for chunk in &result.chunks {
if is_cross_index {
let source_label = chunk.source.as_deref().unwrap_or("project").to_string();
if Some(&source_label) != current_source.as_ref() {
if current_source.is_some() {
println!();
}
if chunk.source.is_some() {
println!("=== Reference: {} ===", source_label.yellow());
} else {
println!("=== Project ===");
}
current_source = Some(source_label);
current_file.clear();
}
}
let file_str = normalize_path(&chunk.file);
if file_str != current_file {
if !current_file.is_empty() {
println!();
}
println!("--- {} ---", file_str.cyan());
current_file = file_str;
}
let depth_label = if chunk.depth == 0 {
if is_cross_index {
if chunk.source.is_some() {
"ref seed".to_string()
} else {
"bridge".to_string()
}
} else {
"seed".to_string()
}
} else {
format!("depth {}", chunk.depth)
};
println!(
" {} ({}:{}, {}, {:.3})",
chunk.name.bold(),
chunk.file.display(),
chunk.line_start,
depth_label,
chunk.score,
);
println!(" {}", chunk.signature.dimmed());
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
fn make_result(chunks: Vec<cqs::GatheredChunk>) -> GatherResult {
GatherResult {
chunks,
expansion_capped: false,
search_degraded: false,
}
}
fn make_chunk(name: &str) -> cqs::GatheredChunk {
cqs::GatheredChunk {
name: name.to_string(),
file: std::path::PathBuf::from("src/lib.rs"),
line_start: 1,
line_end: 10,
language: cqs::parser::Language::Rust,
chunk_type: cqs::parser::ChunkType::Function,
signature: format!("fn {}()", name),
content: "// body".to_string(),
score: 0.9,
depth: 0,
source: None,
}
}
#[test]
fn gather_output_empty() {
let result = make_result(vec![]);
let output = build_gather_output(&result, "test query", None);
assert_eq!(output.query, "test query");
assert!(output.chunks.is_empty());
assert!(!output.expansion_capped);
assert!(!output.search_degraded);
assert!(output.token_count.is_none());
assert!(output.token_budget.is_none());
}
#[test]
fn gather_output_with_chunks() {
let result = make_result(vec![make_chunk("foo"), make_chunk("bar")]);
let output = build_gather_output(&result, "find code", None);
assert_eq!(output.chunks.len(), 2);
assert_eq!(output.chunks[0]["name"], "foo");
assert_eq!(output.chunks[1]["name"], "bar");
}
#[test]
fn gather_output_with_token_info() {
let result = make_result(vec![make_chunk("baz")]);
let output = build_gather_output(&result, "q", Some((150, 500)));
assert_eq!(output.token_count, Some(150));
assert_eq!(output.token_budget, Some(500));
}
#[test]
fn gather_output_flags() {
let result = GatherResult {
chunks: vec![],
expansion_capped: true,
search_degraded: true,
};
let output = build_gather_output(&result, "q", None);
assert!(output.expansion_capped);
assert!(output.search_degraded);
}
#[test]
fn gather_output_serializes() {
let result = make_result(vec![make_chunk("x")]);
let output = build_gather_output(&result, "q", Some((100, 300)));
let json = serde_json::to_value(&output).unwrap();
assert_eq!(json["query"], "q");
assert_eq!(json["token_count"], 100);
assert_eq!(json["token_budget"], 300);
assert!(json["chunks"].is_array());
}
#[test]
fn gather_output_omits_tokens_when_none() {
let result = make_result(vec![]);
let output = build_gather_output(&result, "q", None);
let json = serde_json::to_value(&output).unwrap();
assert!(json.get("token_count").is_none());
assert!(json.get("token_budget").is_none());
}
}