Skip to main content

kaizen/shell/
eval.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2use crate::core::config;
3use crate::eval::engine::{dry_run_candidates, run_evals};
4use crate::store::sqlite::Store;
5use anyhow::Result;
6use std::path::{Path, PathBuf};
7
8pub fn cmd_eval_run(
9    workspace: Option<&Path>,
10    since_days: u64,
11    dry_run: bool,
12    json: bool,
13) -> Result<()> {
14    let ws = resolve_ws(workspace)?;
15    let cfg = config::load(&ws)?;
16    let store = open_store(&ws)?;
17    let since_ms = since_ms_from_days(since_days);
18    if dry_run {
19        let rows = dry_run_candidates(&store, &cfg.eval, since_ms)?;
20        if json {
21            println!("{}", serde_json::to_string_pretty(&rows)?);
22        } else {
23            println!("dry-run: {} sessions would be evaluated", rows.len());
24            rows.iter().for_each(|s| println!("  {}", s.id));
25        }
26    } else {
27        let rows = run_evals(&store, &cfg.eval, &ws, since_ms, false)?;
28        if json {
29            println!("{}", serde_json::to_string_pretty(&rows)?);
30            return Ok(());
31        }
32        println!("evaluated {} session(s)", rows.len());
33        for r in &rows {
34            println!(
35                "  {} score={:.2} flagged={}",
36                r.session_id, r.score, r.flagged
37            );
38        }
39    }
40    Ok(())
41}
42
43pub fn cmd_eval_list(workspace: Option<&Path>, min_score: f64, json: bool) -> Result<()> {
44    let ws = resolve_ws(workspace)?;
45    let store = open_store(&ws)?;
46    let now = now_ms();
47    let rows = store.list_evals_in_window(0, now)?;
48    let filtered: Vec<_> = rows.iter().filter(|r| r.score >= min_score).collect();
49    if json {
50        println!("{}", serde_json::to_string_pretty(&filtered)?);
51    } else {
52        for r in &filtered {
53            println!(
54                "{}\tscore={:.2}\tflagged={}\t{}",
55                r.session_id, r.score, r.flagged, r.rationale
56            );
57        }
58    }
59    Ok(())
60}
61
62pub fn cmd_eval_prompt(workspace: Option<&Path>, session_id: &str, rubric_id: &str) -> Result<()> {
63    let ws = resolve_ws(workspace)?;
64    let store = open_store(&ws)?;
65    let session = store
66        .get_session(session_id)?
67        .ok_or_else(|| anyhow::anyhow!("session not found: {session_id}"))?;
68    let events = store.list_events_for_session(session_id)?;
69    let rubric = crate::eval::rubric::by_id(rubric_id)
70        .ok_or_else(|| anyhow::anyhow!("unknown rubric: {rubric_id}"))?;
71    println!(
72        "{}",
73        crate::eval::judge::build_prompt(rubric, &session, &events)
74    );
75    Ok(())
76}
77
78fn resolve_ws(workspace: Option<&Path>) -> Result<PathBuf> {
79    crate::core::workspace::resolve(workspace)
80}
81
82fn open_store(ws: &Path) -> Result<Store> {
83    Store::open(&crate::core::workspace::db_path(ws)?)
84}
85
86fn since_ms_from_days(days: u64) -> u64 {
87    now_ms().saturating_sub(days * 86_400_000)
88}
89
90fn now_ms() -> u64 {
91    std::time::SystemTime::now()
92        .duration_since(std::time::UNIX_EPOCH)
93        .unwrap_or_default()
94        .as_millis() as u64
95}