use std::path::PathBuf;
use anyhow::{Context, Result, anyhow, bail};
use clap::{Args, Subcommand};
use mnem_bench::bench::{AdapterKind, Bench, EmbedderChoice, RunMode};
use mnem_bench::output;
use mnem_bench::runner::{self, RunPlan};
#[derive(Args, Debug)]
#[command(after_long_help = "\
Examples:
# Interactive setup wizard.
mnem bench
# Print available benches as JSON (CI-friendly).
mnem bench list
# Download a dataset to ~/.mnem/bench-data/.
mnem bench fetch longmemeval
# Non-interactive run, single bench, single adapter.
mnem bench run --benches longmemeval --with mnem --mode cpu-local \\
--out ./bench-out --top-k 10 --limit 5 --non-interactive
# Re-render RESULTS.md from a prior run directory.
mnem bench results ./bench-out
")]
pub(crate) struct BenchArgs {
#[command(subcommand)]
pub sub: Option<BenchSub>,
}
#[derive(Subcommand, Debug)]
pub(crate) enum BenchSub {
List(ListArgs),
Fetch(FetchArgs),
Run(RunArgs),
Results(ResultsArgs),
}
#[derive(Args, Debug)]
pub(crate) struct ListArgs {
#[arg(long)]
pub pretty: bool,
}
#[derive(Args, Debug)]
pub(crate) struct FetchArgs {
pub bench: Option<String>,
#[arg(long)]
pub no_cache: bool,
}
#[derive(Args, Debug)]
pub(crate) struct RunArgs {
#[arg(long, value_delimiter = ',')]
pub benches: Vec<String>,
#[arg(long = "with", value_delimiter = ',', default_values_t = vec!["mnem".to_string()])]
pub with: Vec<String>,
#[arg(long, default_value = "cpu-local")]
pub mode: String,
#[arg(long, default_value = "./bench-out")]
pub out: PathBuf,
#[arg(long, default_value_t = 10)]
pub top_k: usize,
#[arg(long, default_value = "onnx-minilm")]
pub embedder: String,
#[arg(long)]
pub non_interactive: bool,
#[arg(long)]
pub no_cache: bool,
#[arg(long)]
pub limit: Option<usize>,
}
#[derive(Args, Debug)]
pub(crate) struct ResultsArgs {
#[arg(default_value = "./bench-out")]
pub dir: PathBuf,
}
pub(crate) fn run(args: BenchArgs) -> Result<()> {
match args.sub {
None => run_interactive(),
Some(BenchSub::List(a)) => run_list(a),
Some(BenchSub::Fetch(a)) => run_fetch(a),
Some(BenchSub::Run(a)) => run_run(a),
Some(BenchSub::Results(a)) => run_results(a),
}
}
fn run_interactive() -> Result<()> {
let plan = mnem_bench::tui::run_tui("./bench-out")?;
if let Some(plan) = plan {
let outcomes = runner::run(&plan)?;
print_outcome_summary(&outcomes);
}
Ok(())
}
fn run_list(a: ListArgs) -> Result<()> {
let entries = output::list_benches();
let s = if a.pretty {
serde_json::to_string_pretty(&entries)?
} else {
serde_json::to_string(&entries)?
};
println!("{s}");
Ok(())
}
fn run_fetch(a: FetchArgs) -> Result<()> {
let explicit = a.bench.is_some();
let benches: Vec<Bench> = match a.bench.as_deref() {
Some(id) => vec![Bench::from_id(id).ok_or_else(|| anyhow!("unknown bench: {id}"))?],
None => Bench::all().to_vec(),
};
for b in benches {
if matches!(b, Bench::LongMemEvalHybridV4) && !explicit {
eprintln!(
"[mnem bench] {} reuses the longmemeval cache; skipping duplicate fetch.",
b.metadata().id
);
continue;
}
eprintln!("[mnem bench] fetching {}...", b.metadata().id);
let path = mnem_bench::datasets::fetch(b, !a.no_cache, |d, t| {
if t > 0 {
eprint!("\r {d}/{t} bytes");
}
})
.with_context(|| format!("fetching {}", b.metadata().id))?;
eprintln!("\n cached at {}", path.display());
}
Ok(())
}
fn run_run(a: RunArgs) -> Result<()> {
let benches: Vec<Bench> = if a.benches.is_empty() {
Bench::all().to_vec()
} else {
let mut out = Vec::with_capacity(a.benches.len());
for id in &a.benches {
out.push(Bench::from_id(id).ok_or_else(|| anyhow!("unknown bench: {id}"))?);
}
out
};
let mut adapters = Vec::with_capacity(a.with.len());
for id in &a.with {
adapters.push(AdapterKind::from_id(id).ok_or_else(|| anyhow!("unknown adapter: {id}"))?);
}
let mode = RunMode::from_id(&a.mode).ok_or_else(|| anyhow!("unknown mode: {}", a.mode))?;
let embedder = EmbedderChoice::from_id(&a.embedder)
.ok_or_else(|| anyhow!("unknown embedder: {}", a.embedder))?;
if benches.is_empty() {
bail!("no benches selected");
}
if adapters.is_empty() {
bail!("no adapters selected (pass --with mnem)");
}
let plan = RunPlan {
benches,
adapters,
mode,
embedder,
out: a.out,
top_k: a.top_k,
limit: a.limit,
no_cache: a.no_cache,
quiet: a.non_interactive,
};
let outcomes = runner::run(&plan)?;
print_outcome_summary(&outcomes);
Ok(())
}
fn run_results(a: ResultsArgs) -> Result<()> {
output::rerender_from_dir(&a.dir)
.with_context(|| format!("rerendering RESULTS.md from {}", a.dir.display()))?;
println!("rendered {}/RESULTS.md", a.dir.display());
Ok(())
}
fn print_outcome_summary(outcomes: &[mnem_bench::BenchOutcome]) {
println!();
println!("=== mnem-bench summary ===");
for o in outcomes {
match &o.report {
Some(r) => {
let metric_line = if let Some(v) = r.overall.get("recall@5") {
let r10 = r.overall.get("recall@10").copied().unwrap_or(0.0);
format!("R@5={v:.4} R@10={r10:.4}")
} else if let Some(v) = r.overall.get("avg_recall") {
format!("avg_recall={v:.4}")
} else {
"(no headline metric)".to_string()
};
println!(
" {} ({}): n={} {metric_line} runtime={:.1}s",
o.bench.metadata().id,
o.adapter.id(),
r.n_questions,
r.runtime_seconds,
);
}
None => {
println!(
" {} ({}): SKIPPED ({})",
o.bench.metadata().id,
o.adapter.id(),
o.skipped_reason,
);
}
}
}
}