use crate::core::indexer::CodeChunk;
const CHARS_PER_TOKEN: usize = 4;
pub const DEFAULT_MAX_TOKENS: usize = 4000;
fn estimate_tokens(chunk: &CodeChunk, full_content: bool) -> usize {
let body_len = if full_content {
chunk.content.len()
} else {
chunk
.compact_snippet
.as_deref()
.map(str::len)
.unwrap_or_else(|| chunk.content.len().min(560)) };
(body_len + chunk.file.len() + 80) / CHARS_PER_TOKEN
}
fn merge_chunks(a: CodeChunk, b: CodeChunk) -> CodeChunk {
let (primary, secondary) = if a.score >= b.score { (a, b) } else { (b, a) };
let start_line = primary.start_line.min(secondary.start_line);
let end_line = primary.end_line.max(secondary.end_line);
let content = if secondary.content.len() > primary.content.len() {
secondary.content.clone()
} else {
primary.content.clone()
};
let compact_snippet = primary
.compact_snippet
.clone()
.or_else(|| secondary.compact_snippet.clone());
CodeChunk {
id: format!("{}:{}:{}", primary.file, start_line, end_line),
file: primary.file,
language: primary.language,
start_line,
end_line,
content,
function_name: primary.function_name,
score: primary.score, compact_snippet,
match_reason: primary.match_reason,
chunk_type: primary.chunk_type,
calls: primary.calls,
inherits_from: primary.inherits_from,
chunk_depth: primary.chunk_depth,
index_id: primary.index_id,
}
}
fn ranges_overlap(a_start: usize, a_end: usize, b_start: usize, b_end: usize) -> bool {
a_start <= b_end && b_start <= a_end
}
pub fn consolidate_results(
chunks: Vec<CodeChunk>,
max_tokens: usize,
full_content: bool,
) -> (Vec<CodeChunk>, bool) {
let mut merged: Vec<CodeChunk> = Vec::with_capacity(chunks.len());
for chunk in chunks {
let mut absorbed = false;
for existing in merged.iter_mut() {
if existing.file == chunk.file
&& ranges_overlap(
existing.start_line,
existing.end_line,
chunk.start_line,
chunk.end_line,
)
{
let taken = std::mem::replace(existing, placeholder_chunk());
*existing = merge_chunks(taken, chunk.clone());
absorbed = true;
break;
}
}
if !absorbed {
merged.push(chunk);
}
}
merged.sort_by(|a, b| {
b.score
.partial_cmp(&a.score)
.unwrap_or(std::cmp::Ordering::Equal)
});
let total = merged.len();
let mut kept: Vec<CodeChunk> = Vec::new();
let mut used_tokens: usize = 0;
for chunk in merged {
let cost = estimate_tokens(&chunk, full_content);
if !kept.is_empty() && used_tokens + cost > max_tokens {
continue;
}
used_tokens += cost;
kept.push(chunk);
}
let truncated = kept.len() < total;
(kept, truncated)
}
fn placeholder_chunk() -> CodeChunk {
CodeChunk {
id: String::new(),
file: String::new(),
language: None,
start_line: 0,
end_line: 0,
content: String::new(),
function_name: None,
score: 0.0,
compact_snippet: None,
match_reason: String::new(),
chunk_type: Default::default(),
calls: Vec::new(),
inherits_from: Vec::new(),
chunk_depth: 0,
index_id: None,
}
}
pub fn format_results_markdown(
query: &str,
chunks: &[CodeChunk],
full_content: bool,
truncated: bool,
) -> String {
let mut out = String::new();
out.push_str(&format!(
"## Search Results for \"{}\" ({} result{})\n",
query,
chunks.len(),
if chunks.len() == 1 { "" } else { "s" }
));
if truncated {
out.push_str("\n_Note: results truncated to fit token budget._\n");
}
for (i, c) in chunks.iter().enumerate() {
let lang = c.language.as_deref().unwrap_or("");
let body = if full_content {
c.content.as_str()
} else {
c.compact_snippet.as_deref().unwrap_or(c.content.as_str())
};
out.push_str(&format!(
"\n### {}. `{}:{}-{}` [score: {:.2}, {}]\n",
i + 1,
c.file,
c.start_line,
c.end_line,
c.score,
c.match_reason
));
out.push_str(&format!("```{lang}\n"));
out.push_str(body);
if !body.ends_with('\n') {
out.push('\n');
}
out.push_str("```\n");
}
out
}
#[cfg(test)]
mod tests {
use super::*;
fn make_chunk(file: &str, start: usize, end: usize, score: f32) -> CodeChunk {
let content = (start..=end)
.map(|i| format!("line {i}\n"))
.collect::<String>();
let compact = content.lines().take(7).collect::<Vec<_>>().join("\n");
CodeChunk {
id: format!("{file}:{start}:{end}"),
file: file.into(),
language: Some("rust".into()),
start_line: start,
end_line: end,
content,
function_name: None,
score,
compact_snippet: Some(compact),
match_reason: "hybrid".into(),
chunk_type: Default::default(),
calls: vec![],
inherits_from: vec![],
chunk_depth: 0,
index_id: None,
}
}
#[test]
fn dedup_merges_overlapping_same_file() {
let chunks = vec![
make_chunk("a.rs", 10, 20, 0.8),
make_chunk("a.rs", 15, 25, 0.9), ];
let (kept, truncated) = consolidate_results(chunks, 4000, false);
assert_eq!(kept.len(), 1);
assert!(!truncated);
assert_eq!(kept[0].start_line, 10);
assert_eq!(kept[0].end_line, 25);
assert!((kept[0].score - 0.9).abs() < 1e-6); }
#[test]
fn dedup_keeps_disjoint() {
let chunks = vec![
make_chunk("a.rs", 10, 20, 0.8),
make_chunk("a.rs", 30, 40, 0.7), make_chunk("b.rs", 10, 20, 0.6), ];
let (kept, _) = consolidate_results(chunks, 4000, false);
assert_eq!(kept.len(), 3);
}
#[test]
fn truncation_respects_token_budget() {
let chunks = (0..5)
.map(|i| make_chunk(&format!("f{i}.rs"), 1, 100, 1.0 - i as f32 * 0.1))
.collect::<Vec<_>>();
let (kept, truncated) = consolidate_results(chunks, 50, false);
assert_eq!(kept.len(), 1, "tiny budget should keep only the top hit");
assert!(truncated);
}
#[test]
fn truncation_flag_false_when_all_fit() {
let chunks = vec![make_chunk("a.rs", 1, 5, 0.9)];
let (kept, truncated) = consolidate_results(chunks, 4000, false);
assert_eq!(kept.len(), 1);
assert!(!truncated);
}
#[test]
fn results_sorted_by_score_descending() {
let chunks = vec![
make_chunk("a.rs", 1, 5, 0.3),
make_chunk("b.rs", 1, 5, 0.9),
make_chunk("c.rs", 1, 5, 0.6),
];
let (kept, _) = consolidate_results(chunks, 4000, false);
assert_eq!(kept[0].file, "b.rs");
assert_eq!(kept[1].file, "c.rs");
assert_eq!(kept[2].file, "a.rs");
}
#[test]
fn format_markdown_uses_compact_by_default() {
let chunks = vec![make_chunk("src/lib.rs", 10, 30, 0.91)];
let md = format_results_markdown("auth", &chunks, false, false);
assert!(md.contains("## Search Results for \"auth\" (1 result)"));
assert!(md.contains("`src/lib.rs:10-30`"));
assert!(md.contains("[score: 0.91, hybrid]"));
assert!(!md.contains("line 30"), "should use compact snippet by default");
}
#[test]
fn format_markdown_full_content_when_requested() {
let chunks = vec![make_chunk("src/lib.rs", 10, 30, 0.91)];
let md = format_results_markdown("auth", &chunks, true, false);
assert!(md.contains("line 30"), "full_content should emit all lines");
}
#[test]
fn format_markdown_signals_truncation() {
let chunks = vec![make_chunk("a.rs", 1, 5, 0.9)];
let md = format_results_markdown("q", &chunks, false, true);
assert!(md.contains("truncated"));
}
#[test]
fn always_keeps_at_least_one_result_under_tiny_budget() {
let chunks = vec![make_chunk("a.rs", 1, 200, 0.9)];
let (kept, truncated) = consolidate_results(chunks, 0, false);
assert_eq!(kept.len(), 1);
assert!(!truncated);
}
}