Skip to main content

kura_cli/commands/
eval.rs

1use clap::{Args, Subcommand};
2use tokio::process::Command;
3use uuid::Uuid;
4
5use crate::util::exit_error;
6
7#[derive(Subcommand)]
8pub enum EvalCommands {
9    /// Run baseline-vs-candidate shadow evaluation
10    Shadow(EvalShadowArgs),
11}
12
13#[derive(Args, Clone)]
14pub struct EvalCommonArgs {
15    /// User UUID whose inference projections should be replayed
16    #[arg(long)]
17    pub user_id: Uuid,
18
19    /// Optional projection type filter (repeatable)
20    #[arg(
21        long = "projection-type",
22        value_parser = ["semantic_memory", "strength_inference", "readiness_inference", "causal_inference"]
23    )]
24    pub projection_type: Vec<String>,
25
26    /// Engine override used during strength replay windows
27    #[arg(long, default_value = "closed_form", value_parser = ["closed_form", "pymc"])]
28    pub strength_engine: String,
29
30    /// Candidate cutoff used for semantic ranking metrics
31    #[arg(long, default_value_t = 5)]
32    pub semantic_top_k: u32,
33
34    /// Replay source mode
35    #[arg(long, default_value = "both", value_parser = ["projection_history", "event_store", "both"])]
36    pub source: String,
37
38    /// Do not persist run + artifacts in inference_eval tables
39    #[arg(long)]
40    pub no_persist: bool,
41}
42
43#[derive(Args, Clone)]
44pub struct EvalShadowArgs {
45    #[command(flatten)]
46    pub common: EvalCommonArgs,
47
48    /// Candidate strength engine (defaults to baseline strength engine)
49    #[arg(long, value_parser = ["closed_form", "pymc"])]
50    pub candidate_strength_engine: Option<String>,
51
52    /// Candidate replay source (defaults to baseline source)
53    #[arg(long, value_parser = ["projection_history", "event_store", "both"])]
54    pub candidate_source: Option<String>,
55
56    /// Candidate semantic top-k (defaults to baseline semantic-top-k)
57    #[arg(long)]
58    pub candidate_semantic_top_k: Option<u32>,
59}
60
61pub async fn run(command: EvalCommands) -> i32 {
62    match command {
63        EvalCommands::Shadow(args) => run_shadow(args).await,
64    }
65}
66
67async fn run_shadow(args: EvalShadowArgs) -> i32 {
68    let mut worker_args = build_common_worker_args(&args.common);
69    worker_args.push("--shadow".to_string());
70
71    if let Some(candidate_strength_engine) = args.candidate_strength_engine {
72        worker_args.push("--candidate-strength-engine".to_string());
73        worker_args.push(candidate_strength_engine);
74    }
75    if let Some(candidate_source) = args.candidate_source {
76        worker_args.push("--candidate-source".to_string());
77        worker_args.push(candidate_source);
78    }
79    if let Some(candidate_semantic_top_k) = args.candidate_semantic_top_k {
80        worker_args.push("--candidate-semantic-top-k".to_string());
81        worker_args.push(candidate_semantic_top_k.to_string());
82    }
83
84    execute_worker_eval_cli(&worker_args).await
85}
86
87fn build_common_worker_args(common: &EvalCommonArgs) -> Vec<String> {
88    let mut worker_args = vec![
89        "--user-id".to_string(),
90        common.user_id.to_string(),
91        "--strength-engine".to_string(),
92        common.strength_engine.clone(),
93        "--semantic-top-k".to_string(),
94        common.semantic_top_k.to_string(),
95        "--source".to_string(),
96        common.source.clone(),
97    ];
98
99    for projection_type in &common.projection_type {
100        worker_args.push("--projection-type".to_string());
101        worker_args.push(projection_type.clone());
102    }
103
104    if common.no_persist {
105        worker_args.push("--no-persist".to_string());
106    }
107
108    worker_args
109}
110
111async fn execute_worker_eval_cli(worker_args: &[String]) -> i32 {
112    let status = match Command::new("uv")
113        .args([
114            "run",
115            "--project",
116            "workers",
117            "python",
118            "-m",
119            "kura_workers.eval_cli",
120        ])
121        .args(worker_args)
122        .status()
123        .await
124    {
125        Ok(status) => status,
126        Err(err) => {
127            exit_error(
128                &format!("Failed to launch eval runner via uv: {err}"),
129                Some(
130                    "Ensure `uv` is installed and workers environment is available. Fallback: `uv run --project workers python -m kura_workers.eval_cli --shadow ...`",
131                ),
132            );
133        }
134    };
135
136    status.code().unwrap_or(1)
137}
138
139#[cfg(test)]
140mod tests {
141    use super::{EvalCommonArgs, build_common_worker_args};
142    use uuid::Uuid;
143
144    #[test]
145    fn build_common_worker_args_serializes_required_fields() {
146        let args = EvalCommonArgs {
147            user_id: Uuid::parse_str("11111111-1111-1111-1111-111111111111").unwrap(),
148            projection_type: vec![
149                "semantic_memory".to_string(),
150                "strength_inference".to_string(),
151            ],
152            strength_engine: "pymc".to_string(),
153            semantic_top_k: 7,
154            source: "event_store".to_string(),
155            no_persist: true,
156        };
157
158        let serialized = build_common_worker_args(&args);
159        assert!(serialized.contains(&"--user-id".to_string()));
160        assert!(serialized.contains(&"11111111-1111-1111-1111-111111111111".to_string()));
161        assert!(serialized.contains(&"--projection-type".to_string()));
162        assert!(serialized.contains(&"semantic_memory".to_string()));
163        assert!(serialized.contains(&"strength_inference".to_string()));
164        assert!(serialized.contains(&"--strength-engine".to_string()));
165        assert!(serialized.contains(&"pymc".to_string()));
166        assert!(serialized.contains(&"--semantic-top-k".to_string()));
167        assert!(serialized.contains(&"7".to_string()));
168        assert!(serialized.contains(&"--source".to_string()));
169        assert!(serialized.contains(&"event_store".to_string()));
170        assert!(serialized.contains(&"--no-persist".to_string()));
171    }
172}