use std::fs;
use std::io::Write as _;
use std::path::{Path, PathBuf};
use harn_vm::orchestration::{
context_eval_default_output_dir, evaluate_context_eval_manifest, load_context_eval_manifest,
ContextEvalReport, ContextEvalRunReport,
};
use crate::cli::EvalContextArgs;
use crate::dispatch;
use crate::env_guard::ScopedEnvVar;
const CONTEXT_REPORT_ENV: &str = "HARN_EVAL_CONTEXT_REPORT_JSON";
const CONTEXT_OUTPUT_MODE_ENV: &str = "HARN_EVAL_CONTEXT_OUTPUT_MODE";
static DISPATCH_RENDER_LOCK: tokio::sync::Mutex<()> = tokio::sync::Mutex::const_new(());
pub async fn run(args: EvalContextArgs) -> i32 {
let report = match aggregate_report(&args) {
Ok(report) => report,
Err(code) => return code,
};
let output_dir = args.output.unwrap_or_else(context_eval_default_output_dir);
if let Err(error) = fs::create_dir_all(&output_dir) {
eprintln!("error: failed to create {}: {error}", output_dir.display());
return 1;
}
if let Err(error) = write_json_artifacts(&output_dir, &report) {
eprintln!("error: failed to write context eval outputs: {error}");
return 1;
}
let use_legacy = std::env::var("HARN_CLI_IMPL").as_deref() == Ok("rust");
if use_legacy {
if let Err(error) = write_markdown_legacy(&output_dir, &report) {
eprintln!("error: failed to write context eval markdown: {error}");
return 1;
}
announce_output_paths(&output_dir);
if args.json {
if let Err(code) = print_json_legacy(&report) {
return code;
}
} else {
print_summary_legacy(&report);
}
return post_render_exit_code(&report);
}
match write_markdown_dispatch(&output_dir, &report).await {
Ok(()) => {}
Err(code) => return code,
}
announce_output_paths(&output_dir);
if args.json {
if let Err(code) = print_json_dispatch(&report).await {
return code;
}
} else if let Err(code) = print_summary_dispatch(&report).await {
return code;
}
post_render_exit_code(&report)
}
fn aggregate_report(args: &EvalContextArgs) -> Result<ContextEvalReport, i32> {
let manifest = match load_context_eval_manifest(&args.manifest) {
Ok(manifest) => manifest,
Err(error) => {
eprintln!("error: {error}");
return Err(1);
}
};
let report = match evaluate_context_eval_manifest(&manifest) {
Ok(report) => report,
Err(error) => {
eprintln!("error: {error}");
return Err(1);
}
};
Ok(report)
}
fn post_render_exit_code(report: &ContextEvalReport) -> i32 {
if report.pass {
0
} else {
1
}
}
fn announce_output_paths(output_dir: &Path) {
eprintln!(
"wrote {}, {}, and {}",
output_dir.join("summary.json").display(),
output_dir.join("per_run.jsonl").display(),
output_dir.join("summary.md").display()
);
}
fn write_json_artifacts(output_dir: &Path, report: &ContextEvalReport) -> Result<(), String> {
write_json(output_dir.join("summary.json"), report)?;
write_jsonl(output_dir.join("per_run.jsonl"), &report.runs)
}
fn write_json(path: PathBuf, report: &ContextEvalReport) -> Result<(), String> {
let payload = serde_json::to_string_pretty(report).map_err(|error| error.to_string())?;
fs::write(path, payload).map_err(|error| error.to_string())
}
fn write_jsonl(path: PathBuf, runs: &[ContextEvalRunReport]) -> Result<(), String> {
let mut file = fs::File::create(path).map_err(|error| error.to_string())?;
for run in runs {
let line = serde_json::to_string(run).map_err(|error| error.to_string())?;
file.write_all(line.as_bytes())
.map_err(|error| error.to_string())?;
file.write_all(b"\n").map_err(|error| error.to_string())?;
}
Ok(())
}
fn write_markdown_legacy(output_dir: &Path, report: &ContextEvalReport) -> Result<(), String> {
fs::write(
output_dir.join("summary.md"),
legacy_render_markdown(report),
)
.map_err(|error| error.to_string())
}
fn print_json_legacy(report: &ContextEvalReport) -> Result<(), i32> {
match serde_json::to_string_pretty(report) {
Ok(payload) => {
println!("{payload}");
Ok(())
}
Err(error) => {
eprintln!("error: failed to serialize context eval summary: {error}");
Err(1)
}
}
}
fn print_summary_legacy(report: &ContextEvalReport) {
println!(
"context eval: {}/{} passed, mean_correctness={:.2}, mean_tool_quality={:.2}",
report.passed_runs,
report.total_runs,
report.aggregate.mean_final_correctness,
report.aggregate.mean_tool_call_quality
);
}
fn legacy_render_markdown(report: &ContextEvalReport) -> String {
let mut out = String::new();
out.push_str(&format!(
"# Context Eval: {}\n\n",
report
.manifest_name
.as_deref()
.unwrap_or(report.manifest_id.as_str())
));
out.push_str(&format!(
"- status: {}\n- runs: {}/{} passed\n- mean correctness: {:.4}\n- mean tool quality: {:.4}\n- input tokens: {}\n- output tokens: {}\n- cost USD: {:.6}\n\n",
if report.pass { "PASS" } else { "FAIL" },
report.passed_runs,
report.total_runs,
report.aggregate.mean_final_correctness,
report.aggregate.mean_tool_call_quality,
report.aggregate.total_input_tokens,
report.aggregate.total_output_tokens,
report.aggregate.total_cost_usd,
));
out.push_str("| task | mode | pass | correctness | tools | reads before edit | input tokens | compactions | cache key |\n");
out.push_str("|---|---|---:|---:|---:|---:|---:|---:|---|\n");
for run in &report.runs {
out.push_str(&format!(
"| {} | {} | {} | {:.4} | {:.4} | {} | {} | {} | `{}` |\n",
escape_md(&run.task_id),
escape_md(&run.mode_id),
if run.passed { "yes" } else { "no" },
run.final_correctness.score,
run.tool_call_quality.score,
run.reads_before_first_edit,
run.input_tokens,
run.compaction_count,
run.cache.key,
));
}
out
}
fn escape_md(value: &str) -> String {
value.replace('|', "\\|")
}
async fn write_markdown_dispatch(output_dir: &Path, report: &ContextEvalReport) -> Result<(), i32> {
let payload = render_via_dispatch(report, "markdown").await?;
if let Err(error) = fs::write(output_dir.join("summary.md"), payload) {
eprintln!("error: failed to write context eval markdown: {error}");
return Err(1);
}
Ok(())
}
async fn print_summary_dispatch(report: &ContextEvalReport) -> Result<(), i32> {
let payload = render_via_dispatch(report, "summary").await?;
print!("{payload}");
if !payload.ends_with('\n') {
println!();
}
Ok(())
}
async fn print_json_dispatch(report: &ContextEvalReport) -> Result<(), i32> {
let payload = render_via_dispatch(report, "json").await?;
print!("{payload}");
if !payload.ends_with('\n') {
println!();
}
Ok(())
}
async fn render_via_dispatch(report: &ContextEvalReport, mode: &str) -> Result<String, i32> {
let report_json = match serde_json::to_string(report) {
Ok(json) => json,
Err(error) => {
eprintln!("error: failed to serialise ContextEvalReport for dispatch: {error}");
return Err(1);
}
};
let _guard = DISPATCH_RENDER_LOCK.lock().await;
let _report = ScopedEnvVar::set(CONTEXT_REPORT_ENV, &report_json);
let _mode = ScopedEnvVar::set(CONTEXT_OUTPUT_MODE_ENV, mode);
let outcome = dispatch::run_embedded_script("eval/context", Vec::new(), false).await;
if !outcome.stderr.is_empty() {
use std::io::Write as _;
let _ = std::io::stderr().write_all(outcome.stderr.as_bytes());
}
if outcome.exit_code != 0 {
return Err(outcome.exit_code);
}
Ok(outcome.stdout)
}