Skip to main content

kaizen/shell/
eval.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2use crate::core::config;
3use crate::eval::engine::run_evals;
4use crate::store::sqlite::Store;
5use anyhow::Result;
6use std::path::{Path, PathBuf};
7
8pub fn cmd_eval_run(workspace: Option<&Path>, since_days: u64, dry_run: bool) -> Result<()> {
9    let ws = resolve_ws(workspace)?;
10    let cfg = config::load(&ws)?;
11    let store = open_store(&ws)?;
12    let since_ms = since_ms_from_days(since_days);
13    let rows = run_evals(&store, &cfg.eval, &ws, since_ms, dry_run)?;
14    if dry_run {
15        println!("dry-run: {} sessions would be evaluated", rows.len());
16    } else {
17        println!("evaluated {} session(s)", rows.len());
18        for r in &rows {
19            println!(
20                "  {} score={:.2} flagged={}",
21                r.session_id, r.score, r.flagged
22            );
23        }
24    }
25    Ok(())
26}
27
28pub fn cmd_eval_list(workspace: Option<&Path>, min_score: f64, json: bool) -> Result<()> {
29    let ws = resolve_ws(workspace)?;
30    let store = open_store(&ws)?;
31    let now = now_ms();
32    let rows = store.list_evals_in_window(0, now)?;
33    let filtered: Vec<_> = rows.iter().filter(|r| r.score >= min_score).collect();
34    if json {
35        println!("{}", serde_json::to_string_pretty(&filtered)?);
36    } else {
37        for r in &filtered {
38            println!(
39                "{}\tscore={:.2}\tflagged={}\t{}",
40                r.session_id, r.score, r.flagged, r.rationale
41            );
42        }
43    }
44    Ok(())
45}
46
47pub fn cmd_eval_prompt(workspace: Option<&Path>, session_id: &str, rubric_id: &str) -> Result<()> {
48    let ws = resolve_ws(workspace)?;
49    let store = open_store(&ws)?;
50    let session = store
51        .get_session(session_id)?
52        .ok_or_else(|| anyhow::anyhow!("session not found: {session_id}"))?;
53    let events = store.list_events_for_session(session_id)?;
54    let rubric = crate::eval::rubric::by_id(rubric_id)
55        .ok_or_else(|| anyhow::anyhow!("unknown rubric: {rubric_id}"))?;
56    println!(
57        "{}",
58        crate::eval::judge::build_prompt(rubric, &session, &events)
59    );
60    Ok(())
61}
62
63fn resolve_ws(workspace: Option<&Path>) -> Result<PathBuf> {
64    crate::core::workspace::resolve(workspace)
65}
66
67fn open_store(ws: &Path) -> Result<Store> {
68    Store::open(&crate::core::workspace::db_path(ws)?)
69}
70
71fn since_ms_from_days(days: u64) -> u64 {
72    now_ms().saturating_sub(days * 86_400_000)
73}
74
75fn now_ms() -> u64 {
76    std::time::SystemTime::now()
77        .duration_since(std::time::UNIX_EPOCH)
78        .unwrap_or_default()
79        .as_millis() as u64
80}