#![allow(
clippy::cast_possible_truncation,
clippy::cast_sign_loss,
clippy::too_many_lines
)]
use std::collections::BTreeMap;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Instant;
use ripvec_core::chunk::CodeChunk;
use ripvec_core::embed::{Scope, SearchConfig};
use ripvec_core::encoder::ripvec::dense::StaticEncoder;
use ripvec_core::encoder::ripvec::index::RipvecIndex;
use ripvec_core::encoder::ripvec::ranking::is_symbol_query;
use ripvec_core::hybrid::{SearchMode, pagerank_lookup};
use ripvec_core::profile::Profiler;
use ripvec_core::ranking::{CrossEncoderRerank, RankingLayer, apply_chain};
use ripvec_core::repo_map::build_graph;
use ripvec_core::rerank::{DEFAULT_RERANK_CANDIDATES, DEFAULT_RERANK_MODEL, Reranker};
use serde::{Deserialize, Serialize};
const TOP_K: usize = 10;
const LATENCY_RUNS: usize = 5;
const PAGERANK_ALPHA: f32 = 0.5;
const MATCHED_MODEL: &str = "minishlab/potion-code-16M";
const DEFAULT_MODEL: &str = "minishlab/potion-base-32M";
#[derive(Debug, Deserialize)]
struct RepoSpec {
name: String,
language: String,
#[allow(dead_code)]
url: String,
#[allow(dead_code)]
revision: String,
#[serde(default)]
benchmark_root: Option<String>,
}
#[derive(Debug, Deserialize)]
#[serde(untagged)]
enum RawTarget {
Path(String),
Span {
path: String,
#[serde(default)]
start_line: Option<usize>,
#[serde(default)]
end_line: Option<usize>,
},
}
#[derive(Debug, Clone)]
struct Target {
path: String,
start_line: Option<usize>,
end_line: Option<usize>,
}
impl From<RawTarget> for Target {
fn from(raw: RawTarget) -> Self {
match raw {
RawTarget::Path(path) => Self {
path,
start_line: None,
end_line: None,
},
RawTarget::Span {
path,
start_line,
end_line,
} => Self {
path,
start_line,
end_line,
},
}
}
}
#[derive(Debug, Deserialize)]
struct RawTask {
query: String,
#[serde(default)]
relevant: Vec<RawTarget>,
#[serde(default)]
secondary: Vec<RawTarget>,
#[serde(default)]
category: Option<String>,
}
struct Task {
query: String,
targets: Vec<Target>,
category: String,
}
fn infer_category(query: &str) -> &'static str {
let q = query.trim();
if !q.contains(' ') {
return "symbol";
}
let lower = q.to_lowercase();
if lower.starts_with("how ") {
"architecture"
} else {
"semantic"
}
}
fn path_matches(file_path: &str, target_path: &str) -> bool {
let f = file_path.replace('\\', "/");
let t = target_path.replace('\\', "/");
f == t || f.ends_with(&format!("/{t}")) || t.ends_with(&format!("/{f}"))
}
fn target_matches_chunk(chunk: &CodeChunk, target: &Target) -> bool {
if !path_matches(&chunk.file_path, &target.path) {
return false;
}
match (target.start_line, target.end_line) {
(Some(ts), Some(te)) => !(chunk.end_line < ts || chunk.start_line > te),
_ => true,
}
}
fn dcg(rels: &[u8]) -> f64 {
rels.iter()
.enumerate()
.map(|(i, &r)| f64::from(r) / ((i + 2) as f64).log2())
.sum()
}
fn ndcg_at_k(ranks: &[usize], n_relevant: usize, k: usize) -> f64 {
if n_relevant == 0 {
return 0.0;
}
let mut rels = vec![0u8; k];
for &r in ranks {
if (1..=k).contains(&r) {
rels[r - 1] = 1;
}
}
let ideal = dcg(&vec![1u8; k.min(n_relevant)]);
if ideal > 0.0 { dcg(&rels) / ideal } else { 0.0 }
}
fn percentile(sorted: &[f64], p: f64) -> f64 {
if sorted.is_empty() {
return 0.0;
}
let n = sorted.len();
let pos = (p / 100.0) * ((n - 1) as f64);
let lo = pos.floor() as usize;
let hi = pos.ceil() as usize;
if lo == hi {
return sorted[lo];
}
let frac = pos - lo as f64;
sorted[lo] * (1.0 - frac) + sorted[hi] * frac
}
#[derive(Debug, Serialize)]
struct RepoResult {
repo: String,
language: String,
chunks: usize,
tokens: usize,
index_ms: f64,
ndcg5: f64,
ndcg10: f64,
p50_ms: f64,
p90_ms: f64,
p95_ms: f64,
p99_ms: f64,
by_category: BTreeMap<String, f64>,
}
#[derive(Debug, Serialize)]
struct FullReport {
mode: String,
model: String,
n_repos: usize,
avg_ndcg10: f64,
avg_p50_ms: f64,
avg_p90_ms: f64,
avg_p95_ms: f64,
avg_p99_ms: f64,
avg_index_ms: f64,
avg_tokens: f64,
by_language: BTreeMap<String, BTreeMap<String, f64>>,
by_category: BTreeMap<String, f64>,
repos: Vec<RepoResult>,
}
fn evaluate(
index: &RipvecIndex,
tasks: &[Task],
use_pagerank: bool,
pagerank_lookup_arc: Option<&Arc<std::collections::HashMap<String, f32>>>,
reranker: Option<&Arc<Reranker>>,
use_rerank_gate: bool,
) -> (f64, f64, Vec<f64>, BTreeMap<String, Vec<f64>>, usize) {
let mut ndcg5_sum = 0.0_f64;
let mut ndcg10_sum = 0.0_f64;
let mut medians: Vec<f64> = Vec::with_capacity(tasks.len());
let mut category_ndcg10: BTreeMap<String, Vec<f64>> = BTreeMap::new();
let mut total_tokens: usize = 0;
for task in tasks {
let mut latencies: Vec<f64> = Vec::with_capacity(LATENCY_RUNS);
let mut ranked: Vec<(usize, f32)> = Vec::new();
for _ in 0..LATENCY_RUNS {
let started = Instant::now();
ranked = index.search(&task.query, TOP_K, SearchMode::Hybrid, None, None, None);
if use_pagerank && let Some(lookup) = pagerank_lookup_arc {
let layers: Vec<Box<dyn RankingLayer>> = vec![Box::new(
ripvec_core::ranking::PageRankBoost::new(Arc::clone(lookup), PAGERANK_ALPHA),
)];
apply_chain(&mut ranked, index.chunks(), &layers);
}
if use_rerank_gate
&& let Some(rk) = reranker
&& !is_symbol_query(&task.query)
{
let class = index.corpus_class();
let scope_says_docs = matches!(
class,
ripvec_core::encoder::ripvec::index::CorpusClass::Docs
| ripvec_core::encoder::ripvec::index::CorpusClass::Mixed
);
if scope_says_docs {
let layer = CrossEncoderRerank::new(
Arc::clone(rk),
task.query.clone(),
DEFAULT_RERANK_CANDIDATES,
);
let layers: Vec<Box<dyn RankingLayer>> = vec![Box::new(layer)];
apply_chain(&mut ranked, index.chunks(), &layers);
}
}
latencies.push(started.elapsed().as_secs_f64() * 1000.0);
}
latencies.sort_by(|a, b| a.partial_cmp(b).unwrap());
let median = latencies[latencies.len() / 2];
medians.push(median);
total_tokens += ranked
.iter()
.take(TOP_K)
.filter_map(|&(i, _)| index.chunks().get(i))
.map(|c| c.content.len() / 4)
.sum::<usize>();
let mut ranks: Vec<usize> = Vec::new();
for target in &task.targets {
for (rank_idx, &(chunk_idx, _)) in ranked.iter().take(TOP_K).enumerate() {
if let Some(chunk) = index.chunks().get(chunk_idx)
&& target_matches_chunk(chunk, target)
{
ranks.push(rank_idx + 1);
break;
}
}
}
let n_relevant = task.targets.len();
let q_ndcg5 = ndcg_at_k(&ranks, n_relevant, 5);
let q_ndcg10 = ndcg_at_k(&ranks, n_relevant, 10);
ndcg5_sum += q_ndcg5;
ndcg10_sum += q_ndcg10;
category_ndcg10
.entry(task.category.clone())
.or_default()
.push(q_ndcg10);
}
let n_tasks = tasks.len() as f64;
let avg_tokens = if tasks.is_empty() {
0
} else {
total_tokens / tasks.len()
};
(
ndcg5_sum / n_tasks,
ndcg10_sum / n_tasks,
medians,
category_ndcg10,
avg_tokens,
)
}
fn run_repo(
spec: &RepoSpec,
bench_root: &Path,
annotations_dir: &Path,
model_repo: &str,
reranker: Option<&Arc<Reranker>>,
use_pagerank: bool,
use_rerank_gate: bool,
) -> anyhow::Result<Option<RepoResult>> {
let repo_root = match &spec.benchmark_root {
Some(sub) => bench_root.join(&spec.name).join(sub),
None => bench_root.join(&spec.name),
};
if !repo_root.exists() {
eprintln!(
" skip {} (missing checkout at {})",
spec.name,
repo_root.display()
);
return Ok(None);
}
let ann_path = annotations_dir.join(format!("{}.json", spec.name));
if !ann_path.exists() {
eprintln!(
" skip {} (missing annotations at {})",
spec.name,
ann_path.display()
);
return Ok(None);
}
let raw: Vec<RawTask> = serde_json::from_slice(&std::fs::read(&ann_path)?)?;
let tasks: Vec<Task> = raw
.into_iter()
.map(|t| Task {
category: t
.category
.unwrap_or_else(|| infer_category(&t.query).to_string()),
query: t.query,
targets: t
.relevant
.into_iter()
.chain(t.secondary)
.map(Into::into)
.collect(),
})
.collect();
if tasks.is_empty() {
eprintln!(" skip {} (no tasks)", spec.name);
return Ok(None);
}
let cfg = SearchConfig {
scope: Scope::All,
..SearchConfig::default()
};
let profiler = Profiler::noop();
let pr_lookup: Option<std::collections::HashMap<String, f32>> = if use_pagerank {
match build_graph(&repo_root) {
Ok(graph) => Some(pagerank_lookup(&graph)),
Err(_) => None,
}
} else {
None
};
let pr_alpha = if use_pagerank && pr_lookup.is_some() {
PAGERANK_ALPHA
} else {
0.0
};
let pr_lookup_arc = pr_lookup.as_ref().map(|h| Arc::new(h.clone()));
let encoder = StaticEncoder::from_pretrained(model_repo)?;
let started = Instant::now();
let index = RipvecIndex::from_root(&repo_root, encoder, &cfg, &profiler, pr_lookup, pr_alpha)?;
let index_ms = started.elapsed().as_secs_f64() * 1000.0;
let (ndcg5, ndcg10, latencies, by_category, avg_tokens) = evaluate(
&index,
&tasks,
use_pagerank,
pr_lookup_arc.as_ref(),
reranker,
use_rerank_gate,
);
let mut sorted = latencies.clone();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
let p50 = percentile(&sorted, 50.0);
let p90 = percentile(&sorted, 90.0);
let p95 = percentile(&sorted, 95.0);
let p99 = percentile(&sorted, 99.0);
let by_category_avg: BTreeMap<String, f64> = by_category
.into_iter()
.map(|(k, v)| (k, v.iter().sum::<f64>() / v.len() as f64))
.collect();
eprintln!(
"{:<26} {:<11} {:>6}c {:>7}tok {:>6.0}ms-idx ndcg5={:.3} ndcg10={:.3} p50={:.2}ms p90={:.2}ms",
spec.name,
spec.language,
index.chunks().len(),
avg_tokens,
index_ms,
ndcg5,
ndcg10,
p50,
p90
);
Ok(Some(RepoResult {
repo: spec.name.clone(),
language: spec.language.clone(),
chunks: index.chunks().len(),
tokens: avg_tokens,
index_ms,
ndcg5,
ndcg10,
p50_ms: p50,
p90_ms: p90,
p95_ms: p95,
p99_ms: p99,
by_category: by_category_avg,
}))
}
fn main() -> anyhow::Result<()> {
let mut args: Vec<String> = std::env::args().skip(1).collect();
let mut mode = "matched".to_string();
let mut repos_json = PathBuf::from(std::env::var("HOME").unwrap_or_default())
.join("src/semble/benchmarks/repos.json");
let mut bench_root =
PathBuf::from(std::env::var("HOME").unwrap_or_default()).join(".cache/semble-bench");
let mut annotations_dir = PathBuf::from(std::env::var("HOME").unwrap_or_default())
.join("src/semble/benchmarks/annotations");
let mut out_path: Option<PathBuf> = None;
let mut language_filter: Vec<String> = Vec::new();
let mut name_filter: Vec<String> = Vec::new();
let mut i = 0;
while i < args.len() {
match args[i].as_str() {
"--mode" => {
args.remove(i);
if i < args.len() {
mode = args.remove(i);
}
}
"--repos-json" => {
args.remove(i);
if i < args.len() {
repos_json = args.remove(i).into();
}
}
"--bench-root" => {
args.remove(i);
if i < args.len() {
bench_root = args.remove(i).into();
}
}
"--annotations-dir" => {
args.remove(i);
if i < args.len() {
annotations_dir = args.remove(i).into();
}
}
"--out" => {
args.remove(i);
if i < args.len() {
out_path = Some(args.remove(i).into());
}
}
"--language" => {
args.remove(i);
if i < args.len() {
language_filter.push(args.remove(i));
}
}
"--repo" => {
args.remove(i);
if i < args.len() {
name_filter.push(args.remove(i));
}
}
_ => i += 1,
}
}
let (model_repo, use_pagerank, use_rerank) = match mode.as_str() {
"matched" => (MATCHED_MODEL, false, false),
"default" => (DEFAULT_MODEL, true, true),
other => anyhow::bail!("unknown --mode {other}: expected 'matched' or 'default'"),
};
eprintln!("model: {model_repo}");
let reranker: Option<Arc<Reranker>> = if use_rerank {
eprintln!("loading reranker ({DEFAULT_RERANK_MODEL})...");
Some(Arc::new(Reranker::from_pretrained(DEFAULT_RERANK_MODEL)?))
} else {
None
};
let specs: Vec<RepoSpec> = serde_json::from_slice(&std::fs::read(&repos_json)?)?;
let filtered: Vec<&RepoSpec> = specs
.iter()
.filter(|s| language_filter.is_empty() || language_filter.contains(&s.language))
.filter(|s| name_filter.is_empty() || name_filter.contains(&s.name))
.collect();
eprintln!(
"running ripvec semble-equivalent bench (mode={}) over {} repos",
mode,
filtered.len()
);
eprintln!();
eprintln!(
"{:<26} {:<11} {:>7} {:>10} {:>10} {:>15} {:>9} {:>9}",
"Repo", "Language", "Chunks", "Tokens", "Index", "NDCG (5 / 10)", "p50", "p90"
);
let mut results: Vec<RepoResult> = Vec::new();
for spec in filtered {
match run_repo(
spec,
&bench_root,
&annotations_dir,
model_repo,
reranker.as_ref(),
use_pagerank,
use_rerank,
) {
Ok(Some(r)) => results.push(r),
Ok(None) => {}
Err(e) => eprintln!(" fail {}: {e}", spec.name),
}
}
if results.is_empty() {
anyhow::bail!("no results: check --bench-root and that repos are cloned + annotated");
}
let mut by_language: BTreeMap<String, Vec<&RepoResult>> = BTreeMap::new();
for r in &results {
by_language.entry(r.language.clone()).or_default().push(r);
}
let lang_summary: BTreeMap<String, BTreeMap<String, f64>> = by_language
.iter()
.map(|(lang, group)| {
let n = group.len() as f64;
let mut m = BTreeMap::new();
m.insert("repos".to_string(), n);
m.insert(
"ndcg5".to_string(),
group.iter().map(|r| r.ndcg5).sum::<f64>() / n,
);
m.insert(
"ndcg10".to_string(),
group.iter().map(|r| r.ndcg10).sum::<f64>() / n,
);
m.insert(
"tokens".to_string(),
group.iter().map(|r| r.tokens as f64).sum::<f64>() / n,
);
m.insert(
"p50_ms".to_string(),
group.iter().map(|r| r.p50_ms).sum::<f64>() / n,
);
m.insert(
"p90_ms".to_string(),
group.iter().map(|r| r.p90_ms).sum::<f64>() / n,
);
m.insert(
"p95_ms".to_string(),
group.iter().map(|r| r.p95_ms).sum::<f64>() / n,
);
m.insert(
"p99_ms".to_string(),
group.iter().map(|r| r.p99_ms).sum::<f64>() / n,
);
m.insert(
"index_ms".to_string(),
group.iter().map(|r| r.index_ms).sum::<f64>() / n,
);
(lang.clone(), m)
})
.collect();
let mut cat_acc: BTreeMap<String, Vec<f64>> = BTreeMap::new();
for r in &results {
for (cat, val) in &r.by_category {
cat_acc.entry(cat.clone()).or_default().push(*val);
}
}
let by_category_avg: BTreeMap<String, f64> = cat_acc
.into_iter()
.map(|(k, v)| (k, v.iter().sum::<f64>() / v.len() as f64))
.collect();
let lang_macro = |key: &str| -> f64 {
let vals: Vec<f64> = lang_summary
.values()
.filter_map(|m| m.get(key).copied())
.collect();
if vals.is_empty() {
0.0
} else {
vals.iter().sum::<f64>() / vals.len() as f64
}
};
let report = FullReport {
mode: mode.clone(),
model: model_repo.to_string(),
n_repos: results.len(),
avg_ndcg10: lang_macro("ndcg10"),
avg_p50_ms: lang_macro("p50_ms"),
avg_p90_ms: lang_macro("p90_ms"),
avg_p95_ms: lang_macro("p95_ms"),
avg_p99_ms: lang_macro("p99_ms"),
avg_index_ms: lang_macro("index_ms"),
avg_tokens: lang_macro("tokens"),
by_language: lang_summary.clone(),
by_category: by_category_avg.clone(),
repos: results,
};
eprintln!();
eprintln!("{}", "=".repeat(104));
eprintln!("ripvec hybrid benchmark by language (mode={mode})");
eprintln!("{}", "=".repeat(104));
eprintln!();
let langs: Vec<&String> = lang_summary.keys().collect();
let header_cols: Vec<String> = std::iter::once("Avg".to_string())
.chain(langs.iter().map(|l| {
let mut c = l.chars();
match c.next() {
Some(f) => f.to_uppercase().collect::<String>() + c.as_str(),
None => String::new(),
}
}))
.collect();
eprintln!(
" {:<28} {}",
"Metric",
header_cols
.iter()
.map(|h| format!("{h:>9}"))
.collect::<Vec<_>>()
.join(" ")
);
eprintln!(
" {:<28} {}",
"-".repeat(28),
header_cols
.iter()
.map(|_| "-".repeat(9))
.collect::<Vec<_>>()
.join(" ")
);
let row = |label: &str, key: &str, suffix: &str| {
let avg = lang_macro(key);
let avg_cell = if suffix.is_empty() {
format!("{avg:>9.3}")
} else {
format!("{avg:>8.2}{suffix}")
};
let mut cells: Vec<String> = vec![avg_cell];
for lang in &langs {
let v = lang_summary[*lang][key];
cells.push(if suffix.is_empty() {
format!("{v:>9.3}")
} else {
format!("{v:>8.2}{suffix}")
});
}
eprintln!(" {label:<28} {}", cells.join(" "));
};
row("NDCG@10", "ndcg10", "");
row("tokens", "tokens", "");
row("q-p50", "p50_ms", "ms");
row("q-p90", "p90_ms", "ms");
row("q-p95", "p95_ms", "ms");
row("q-p99", "p99_ms", "ms");
row("index", "index_ms", "ms");
if !report.by_category.is_empty() {
eprintln!();
eprintln!("By category (NDCG@10, mean over all repos)");
for (cat, val) in &report.by_category {
eprintln!(" {cat:<16} {val:.3}");
}
}
let json = serde_json::to_string_pretty(&report)?;
if let Some(out) = out_path {
std::fs::write(&out, &json)?;
eprintln!();
eprintln!("wrote results to {}", out.display());
} else {
println!("{json}");
}
Ok(())
}