use crate::core::event::{Event, EventKind, SessionRecord};
use crate::eval::rubric::Rubric;
use crate::eval::types::{EvalRow, JudgeResponse};
use anyhow::{Context, Result, bail};
use serde_json::json;
fn summarise(session: &SessionRecord, events: &[Event]) -> String {
let tool_seq: Vec<_> = events
.iter()
.filter(|e| e.kind == EventKind::ToolCall)
.filter_map(|e| e.tool.as_deref())
.collect();
let errors = events.iter().filter(|e| e.kind == EventKind::Error).count();
let cost_usd: f64 = events
.iter()
.filter_map(|e| e.cost_usd_e6)
.map(|c| c as f64 / 1_000_000.0)
.sum();
let duration_s = session
.ended_at_ms
.map(|end| end.saturating_sub(session.started_at_ms) / 1000)
.unwrap_or(0);
format!(
"tools: {}\nerrors: {}\ncost_usd: {:.4}\nduration_s: {}",
tool_seq.join(", "),
errors,
cost_usd,
duration_s,
)
}
pub fn build_prompt(rubric: &Rubric, session: &SessionRecord, events: &[Event]) -> String {
let summary = summarise(session, events);
rubric.prompt_template.replace("{summary}", &summary)
}
pub fn judge_session(
client: &reqwest::blocking::Client,
endpoint: &str,
api_key: &str,
model: &str,
rubric: &Rubric,
session: &SessionRecord,
events: &[Event],
) -> Result<EvalRow> {
let prompt = build_prompt(rubric, session, events);
let body = json!({
"model": model,
"max_tokens": 256,
"messages": [{"role": "user", "content": prompt}],
});
let url = format!("{}/v1/messages", endpoint.trim_end_matches('/'));
let resp = client
.post(&url)
.header("x-api-key", api_key)
.header("anthropic-version", "2023-06-01")
.json(&body)
.send()
.context("judge HTTP request failed")?;
if !resp.status().is_success() {
bail!("judge returned {}", resp.status());
}
parse_judge_response(resp, session, model, rubric)
}
fn parse_judge_response(
resp: reqwest::blocking::Response,
session: &SessionRecord,
model: &str,
rubric: &Rubric,
) -> Result<EvalRow> {
let raw: serde_json::Value = resp.json().context("judge response parse")?;
let text = raw["content"][0]["text"]
.as_str()
.context("missing text in judge response")?;
let jr: JudgeResponse = serde_json::from_str(text).context("judge JSON parse")?;
let score = jr.score.clamp(0.0, 1.0);
Ok(EvalRow {
id: format!("{}:{}", session.id, rubric.id),
session_id: session.id.clone(),
judge_model: model.to_owned(),
rubric_id: rubric.id.to_owned(),
score,
rationale: jr.rationale,
flagged: score < 0.4,
created_at_ms: now_ms(),
})
}
fn now_ms() -> u64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_millis() as u64
}