1use crate::core::config::EvalConfig;
3use crate::core::event::SessionRecord;
4use crate::eval::judge::judge_session;
5use crate::eval::rubric;
6use crate::eval::types::EvalRow;
7use crate::store::sqlite::Store;
8use anyhow::{Context, Result, bail};
9use std::path::Path;
10
11pub fn run_evals(
12 store: &Store,
13 cfg: &EvalConfig,
14 workspace: &Path,
15 since_ms: u64,
16 dry_run: bool,
17) -> Result<Vec<EvalRow>> {
18 if !cfg.enabled {
19 return Ok(vec![]);
20 }
21 let rubric =
22 rubric::by_id(&cfg.rubric).with_context(|| format!("unknown rubric: {}", cfg.rubric))?;
23 let api_key = resolve_api_key(cfg);
24 if api_key.is_empty() {
25 bail!("eval.api_key not set and ANTHROPIC_API_KEY env var is empty");
26 }
27 let client = reqwest::blocking::Client::new();
28 let candidates = store
29 .list_sessions_for_eval(since_ms, cfg.min_cost_usd)
30 .context("list sessions for eval")?;
31 let results = candidates
32 .iter()
33 .take(cfg.batch_size)
34 .filter_map(|s| eval_one(store, &client, cfg, rubric, s, dry_run))
35 .collect();
36 let _ = workspace;
37 Ok(results)
38}
39
40fn eval_one(
41 store: &Store,
42 client: &reqwest::blocking::Client,
43 cfg: &EvalConfig,
44 rubric: &crate::eval::rubric::Rubric,
45 session: &SessionRecord,
46 dry_run: bool,
47) -> Option<EvalRow> {
48 if dry_run {
49 eprintln!("[dry-run] would eval session {}", session.id);
50 return None;
51 }
52 let events = store.list_events_for_session(&session.id).ok()?;
53 let row = judge_session(
54 client,
55 &cfg.endpoint,
56 &resolve_api_key(cfg),
57 &cfg.model,
58 rubric,
59 session,
60 &events,
61 )
62 .ok()?;
63 store.upsert_eval(&row).ok()?;
64 Some(row)
65}
66
67fn resolve_api_key(cfg: &EvalConfig) -> String {
68 if !cfg.api_key.is_empty() {
69 return cfg.api_key.clone();
70 }
71 std::env::var("ANTHROPIC_API_KEY").unwrap_or_default()
72}