#![allow(
// Module docstring + CLI-help text reference LLM, JSON, RFC3339 —
// running clippy::doc_markdown over them adds noise without
// catching anything load-bearing.
clippy::doc_markdown,
// Bench wrapper allocates owned `String` summaries that flow into
// stdout / file writes; the per-call cost is negligible compared
// with the per-scenario reflection passes.
clippy::missing_errors_doc,
clippy::missing_panics_doc
)]
use std::path::PathBuf;
#[path = "../benchmarks/longmemeval_reflection/dataset.rs"]
mod dataset;
#[path = "../benchmarks/longmemeval_reflection/runner.rs"]
mod runner;
fn main() -> anyhow::Result<()> {
let args: Vec<String> = std::env::args().collect();
let smoke = args.iter().any(|a| a == "--test");
let regenerate = args.iter().any(|a| a == "--regenerate");
let load_snapshot = args.iter().any(|a| a == "--load-snapshot");
let repo_root = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
let snapshot_path = repo_root
.join("benchmarks")
.join("longmemeval_reflection")
.join("data")
.join("scenarios.jsonl");
if regenerate {
let scenarios = dataset::generate_scenarios();
let jsonl = dataset::serialise_jsonl(&scenarios);
std::fs::create_dir_all(snapshot_path.parent().expect("snapshot parent"))?;
std::fs::write(&snapshot_path, jsonl)?;
println!(
"regenerated {} scenarios → {}",
scenarios.len(),
snapshot_path.display()
);
return Ok(());
}
let scenarios = if load_snapshot {
let jsonl = std::fs::read_to_string(&snapshot_path).map_err(|e| {
anyhow::anyhow!(
"scenarios.jsonl not found at {} — run with --regenerate first ({})",
snapshot_path.display(),
e
)
})?;
dataset::load_jsonl(&jsonl)?
} else {
dataset::generate_scenarios()
};
let llm = runner::DeterministicLlmStub::from_scenarios(&scenarios);
let judge = runner::DeterministicJudge::default();
let report = runner::run(&scenarios, &llm, &judge, smoke)?;
let bench_dir = repo_root.join("target").join("bench");
std::fs::create_dir_all(&bench_dir)?;
let json_path = bench_dir.join("longmemeval-reflection.json");
let md_path = bench_dir.join("longmemeval-reflection.md");
std::fs::write(&json_path, serde_json::to_string_pretty(&report)?)?;
let md = report.render_markdown();
std::fs::write(&md_path, &md)?;
println!("{md}");
println!("results: {}", json_path.display());
match report.check_targets() {
Ok(()) => {
println!("ALL GATES PASS");
Ok(())
}
Err(fails) => {
for f in &fails {
eprintln!("GATE FAIL: {f}");
}
anyhow::bail!("{} spec gate(s) failed", fails.len())
}
}
}