1use std::fs;
8use std::path::Path;
9
10use anyhow::{Context, Result};
11use serde::{Deserialize, Serialize};
12
13use crate::bench::Bench;
14use crate::runner::BenchOutcome;
15use crate::score::ScoreReport;
16
17pub fn write_results_md(out: &Path, outcomes: &[BenchOutcome]) -> Result<()> {
19 let mut s = String::new();
20 s.push_str("# mnem-bench results\n\n");
21 s.push_str("| Bench | Adapter | n | metric | value | runtime (s) |\n");
22 s.push_str("|-------|---------|---|--------|------:|-----------:|\n");
23 for o in outcomes {
24 let bench_id = o.bench.metadata().id;
25 let adapter_id = o.adapter.id();
26 match &o.report {
27 Some(r) => {
28 let (metric, value) = if let Some(v) = r.overall.get("recall@5") {
31 ("recall@5", *v)
32 } else if let Some(v) = r.overall.get("avg_recall") {
33 ("avg_recall", *v)
34 } else {
35 ("--", 0.0)
36 };
37 s.push_str(&format!(
38 "| {bench_id} | {adapter_id} | {} | {metric} | {value:.4} | {:.1} |\n",
39 r.n_questions, r.runtime_seconds,
40 ));
41 }
42 None => {
43 s.push_str(&format!(
44 "| {bench_id} | {adapter_id} | -- | -- | -- | skipped: {} |\n",
45 o.skipped_reason,
46 ));
47 }
48 }
49 }
50 s.push('\n');
51 s.push_str("## Notes\n\n");
52 s.push_str("- Benches: LongMemEval, LoCoMo, ConvoMem, MemBench (simple-roles + ");
53 s.push_str(
54 "highlevel-movie), LongMemEval-hybrid-v4. All run against the in-process mnem adapter.\n",
55 );
56 s.push_str("- Default embedder: ONNX MiniLM-L6-v2 (bundled, in-process).\n");
57 s.push_str(" Pass `--embedder bag-of-tokens` for offline / CI runs that\n");
58 s.push_str(" skip the ONNX model load (toy embedder; recall is not\n");
59 s.push_str(" comparable to headline ONNX figures).\n");
60 let p = out.join("RESULTS.md");
61 fs::write(&p, s).with_context(|| format!("writing {}", p.display()))?;
62 Ok(())
63}
64
65pub fn rerender_from_dir(dir: &Path) -> Result<()> {
68 let mut outcomes = Vec::new();
69 for bench in Bench::all() {
70 let id = bench.metadata().id;
71 let p = dir.join(format!("{id}.json"));
72 if !p.is_file() {
73 continue;
74 }
75 let bytes = fs::read(&p).with_context(|| format!("reading {}", p.display()))?;
76 let report: ScoreReport =
77 serde_json::from_slice(&bytes).with_context(|| format!("parsing {}", p.display()))?;
78 let adapter = crate::bench::AdapterKind::from_id(&report.adapter)
80 .unwrap_or(crate::bench::AdapterKind::Mnem);
81 outcomes.push(BenchOutcome {
82 bench: *bench,
83 adapter,
84 report: Some(report),
85 skipped_reason: String::new(),
86 });
87 }
88 write_results_md(dir, &outcomes)
89}
90
91#[derive(Serialize, Deserialize)]
94pub struct BenchListEntry {
95 pub id: &'static str,
97 pub display: &'static str,
99 pub eta_seconds: u64,
101 pub dataset_bytes: u64,
103 pub description: &'static str,
105}
106
107#[must_use]
110pub fn list_benches() -> Vec<BenchListEntry> {
111 Bench::all()
112 .iter()
113 .map(|b| {
114 let m = b.metadata();
115 BenchListEntry {
116 id: m.id,
117 display: m.display,
118 eta_seconds: m.eta_seconds,
119 dataset_bytes: m.dataset_bytes,
120 description: m.description,
121 }
122 })
123 .collect()
124}