Skip to main content

kaizen/shell/
exp.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2//! `kaizen exp` — experiment CRUD + report rendering.
3
4use crate::core::config;
5use crate::core::repo::repo_head;
6use crate::experiment::store as exp_store;
7use crate::experiment::types::{
8    Binding, Classification, Criterion, Direction, Experiment, Metric, State, transition,
9};
10use crate::experiment::{self as exp};
11use crate::shell::cli::{maybe_scan_all_agents, open_workspace_read_store, workspace_path};
12use crate::store::Store;
13use anyhow::{Context, Result, anyhow};
14use std::path::Path;
15use std::time::{SystemTime, UNIX_EPOCH};
16
17pub struct NewArgs {
18    pub name: String,
19    pub hypothesis: String,
20    pub change: String,
21    pub metric: String,
22    pub bind: String,
23    pub duration_days: u32,
24    pub target_pct: f64,
25    pub control_commit: Option<String>,
26    pub treatment_commit: Option<String>,
27    pub control_branch: Option<String>,
28    pub treatment_branch: Option<String>,
29    pub control_fingerprint: Option<String>,
30    pub treatment_fingerprint: Option<String>,
31}
32
33pub fn exp_new_text(workspace: Option<&Path>, args: NewArgs) -> Result<String> {
34    let ws = workspace_path(workspace)?;
35    let store = Store::open(&crate::core::workspace::db_path(&ws)?)?;
36    let metric =
37        Metric::parse(&args.metric).ok_or_else(|| anyhow!("unknown metric: {}", args.metric))?;
38    let binding = build_binding(&ws, &args)?;
39    let (direction, target_pct) = split_target(args.target_pct);
40    let created_at = now_ms();
41    let exp_rec = Experiment {
42        id: deterministic_exp_id(&args.name, created_at),
43        name: args.name.clone(),
44        hypothesis: args.hypothesis,
45        change_description: args.change,
46        metric,
47        binding,
48        duration_days: args.duration_days,
49        success_criterion: Criterion::Delta {
50            direction,
51            target_pct,
52        },
53        state: State::Draft,
54        created_at_ms: created_at,
55        concluded_at_ms: None,
56        guardrails: Vec::new(),
57    };
58    exp_store::save_experiment(&store, &exp_rec)?;
59    Ok(format!("created {} · {}\n", exp_rec.id, exp_rec.name))
60}
61
62pub fn cmd_new(workspace: Option<&Path>, args: NewArgs) -> Result<()> {
63    print!("{}", exp_new_text(workspace, args)?);
64    Ok(())
65}
66
67fn build_binding(ws: &Path, args: &NewArgs) -> Result<Binding> {
68    match args.bind.as_str() {
69        "git" => {
70            let treatment = match args.treatment_commit.clone() {
71                Some(v) => v,
72                None => repo_head(ws)?
73                    .ok_or_else(|| anyhow!("not a git repo; pass --treatment-commit"))?,
74            };
75            let control = match args.control_commit.clone() {
76                Some(v) => v,
77                None => parent_of(ws, &treatment)?,
78            };
79            Ok(Binding::GitCommit {
80                control_commit: control,
81                treatment_commit: treatment,
82            })
83        }
84        "branch" => {
85            let control = args
86                .control_branch
87                .clone()
88                .ok_or_else(|| anyhow!("--control-branch required for --bind branch"))?;
89            let treatment = args
90                .treatment_branch
91                .clone()
92                .ok_or_else(|| anyhow!("--treatment-branch required for --bind branch"))?;
93            Ok(Binding::Branch {
94                control_branch: control,
95                treatment_branch: treatment,
96            })
97        }
98        "manual" => Ok(Binding::ManualTag {
99            variant_field: "variant".into(),
100        }),
101        "prompt" => Ok(Binding::PromptFingerprint {
102            control_fingerprint: args
103                .control_fingerprint
104                .clone()
105                .ok_or_else(|| anyhow!("--control-fingerprint required for --bind prompt"))?,
106            treatment_fingerprint: args
107                .treatment_fingerprint
108                .clone()
109                .ok_or_else(|| anyhow!("--treatment-fingerprint required for --bind prompt"))?,
110        }),
111        other => Err(anyhow!(
112            "unsupported bind: {other} (use git|branch|manual|prompt)"
113        )),
114    }
115}
116
117fn split_target(pct: f64) -> (Direction, f64) {
118    if pct < 0.0 {
119        (Direction::Decrease, pct)
120    } else {
121        (Direction::Increase, pct)
122    }
123}
124
125fn parent_of(ws: &Path, commit: &str) -> Result<String> {
126    let out = std::process::Command::new("git")
127        .arg("-C")
128        .arg(ws)
129        .args(["rev-parse", &format!("{commit}^")])
130        .output()
131        .context("git rev-parse parent")?;
132    if !out.status.success() {
133        return Err(anyhow!(
134            "git rev-parse failed: {}",
135            String::from_utf8_lossy(&out.stderr)
136        ));
137    }
138    Ok(String::from_utf8_lossy(&out.stdout).trim().to_string())
139}
140
141pub fn exp_list_text(workspace: Option<&Path>) -> Result<String> {
142    use std::fmt::Write;
143    let ws = workspace_path(workspace)?;
144    let store = Store::open(&crate::core::workspace::db_path(&ws)?)?;
145    let all = exp_store::list_experiments(&store)?;
146    let mut out = String::new();
147    if all.is_empty() {
148        writeln!(&mut out, "(no experiments)").unwrap();
149        return Ok(out);
150    }
151    writeln!(
152        &mut out,
153        "{:<38} {:<10} {:<24} METRIC",
154        "ID", "STATE", "NAME"
155    )
156    .unwrap();
157    writeln!(&mut out, "{}", "-".repeat(96)).unwrap();
158    for e in &all {
159        writeln!(
160            &mut out,
161            "{:<38} {:<10?} {:<24} {}",
162            e.id,
163            e.state,
164            truncate(&e.name, 24),
165            e.metric.as_str()
166        )
167        .unwrap();
168    }
169    Ok(out)
170}
171
172pub fn cmd_list(workspace: Option<&Path>) -> Result<()> {
173    print!("{}", exp_list_text(workspace)?);
174    Ok(())
175}
176
177pub fn exp_status_text(workspace: Option<&Path>, id: &str) -> Result<String> {
178    use std::fmt::Write;
179    let ws = workspace_path(workspace)?;
180    let store = Store::open(&crate::core::workspace::db_path(&ws)?)?;
181    let e = exp_store::load_experiment(&store, id)?
182        .ok_or_else(|| anyhow!("experiment not found: {id}"))?;
183    let mut out = String::new();
184    writeln!(&mut out, "id:         {}", e.id).unwrap();
185    writeln!(&mut out, "name:       {}", e.name).unwrap();
186    writeln!(&mut out, "state:      {:?}", e.state).unwrap();
187    writeln!(&mut out, "metric:     {}", e.metric.as_str()).unwrap();
188    writeln!(&mut out, "duration:   {}d", e.duration_days).unwrap();
189    writeln!(&mut out, "created:    {}", e.created_at_ms).unwrap();
190    if let Some(c) = e.concluded_at_ms {
191        writeln!(&mut out, "concluded:  {c}").unwrap();
192    }
193    writeln!(&mut out, "hypothesis: {}", e.hypothesis).unwrap();
194    writeln!(&mut out, "change:     {}", e.change_description).unwrap();
195    match &e.binding {
196        Binding::GitCommit {
197            control_commit,
198            treatment_commit,
199        } => {
200            writeln!(
201                &mut out,
202                "binding:    git control={control_commit} treatment={treatment_commit}"
203            )
204            .unwrap();
205        }
206        Binding::Branch {
207            control_branch,
208            treatment_branch,
209        } => {
210            writeln!(
211                &mut out,
212                "binding:    branch control={control_branch} treatment={treatment_branch}"
213            )
214            .unwrap();
215        }
216        Binding::PromptFingerprint {
217            control_fingerprint,
218            treatment_fingerprint,
219        } => {
220            writeln!(
221                &mut out,
222                "binding:    prompt control={control_fingerprint} treatment={treatment_fingerprint}"
223            )
224            .unwrap();
225        }
226        Binding::ManualTag { variant_field } => {
227            writeln!(&mut out, "binding:    manual({variant_field})").unwrap();
228        }
229    }
230    Ok(out)
231}
232
233pub fn cmd_status(workspace: Option<&Path>, id: &str) -> Result<()> {
234    print!("{}", exp_status_text(workspace, id)?);
235    Ok(())
236}
237
238pub fn exp_tag_text(
239    workspace: Option<&Path>,
240    id: &str,
241    session_id: &str,
242    variant: &str,
243) -> Result<String> {
244    let ws = workspace_path(workspace)?;
245    let store = Store::open(&crate::core::workspace::db_path(&ws)?)?;
246    let v = match variant {
247        "control" => Classification::Control,
248        "treatment" => Classification::Treatment,
249        "excluded" => Classification::Excluded,
250        other => {
251            return Err(anyhow!(
252                "variant must be control|treatment|excluded, got {other}"
253            ));
254        }
255    };
256    exp_store::tag_session(&store, id, session_id, v)?;
257    Ok(format!("tagged {session_id} -> {variant} for {id}\n"))
258}
259
260pub fn cmd_tag(workspace: Option<&Path>, id: &str, session_id: &str, variant: &str) -> Result<()> {
261    print!("{}", exp_tag_text(workspace, id, session_id, variant)?);
262    Ok(())
263}
264
265pub fn exp_report_text(
266    workspace: Option<&Path>,
267    id: &str,
268    json_out: bool,
269    refresh: bool,
270) -> Result<String> {
271    let ws = workspace_path(workspace)?;
272    let store = open_workspace_read_store(&ws, refresh)?;
273    let ws_str = ws.to_string_lossy().to_string();
274    if refresh {
275        let cfg = config::load(&ws)?;
276        maybe_scan_all_agents(&ws, &cfg, &ws_str, &store, true)?;
277    }
278    let exp_rec = exp_store::load_experiment(&store, id)?
279        .ok_or_else(|| anyhow!("experiment not found: {id}"))?;
280    let (start_ms, end_ms) = window_for(&exp_rec);
281    let manual = exp_store::manual_tags(&store, id)?;
282    let (sessions, values) =
283        metric_values_by_session(&store, &ws_str, start_ms, end_ms, exp_rec.metric)?;
284    let mut guardrail_values = std::collections::HashMap::new();
285    for guardrail in &exp_rec.guardrails {
286        let (_, values) =
287            metric_values_by_session(&store, &ws_str, start_ms, end_ms, guardrail.metric)?;
288        guardrail_values.insert(guardrail.metric, values);
289    }
290    let report = exp::run_from_metric_values(
291        &exp_rec,
292        &sessions,
293        &values,
294        &guardrail_values,
295        &manual,
296        &ws,
297        false,
298    );
299    if json_out {
300        Ok(serde_json::to_string_pretty(&report)?)
301    } else {
302        Ok(exp::to_markdown(&report))
303    }
304}
305
306pub fn cmd_report(workspace: Option<&Path>, id: &str, json_out: bool, refresh: bool) -> Result<()> {
307    print!("{}", exp_report_text(workspace, id, json_out, refresh)?);
308    Ok(())
309}
310
311pub fn exp_conclude_text(workspace: Option<&Path>, id: &str) -> Result<String> {
312    let ws = workspace_path(workspace)?;
313    let store = Store::open(&crate::core::workspace::db_path(&ws)?)?;
314    let exp_rec = exp_store::load_experiment(&store, id)?
315        .ok_or_else(|| anyhow!("experiment not found: {id}"))?;
316    let next = transition(exp_rec.state, "conclude")
317        .ok_or_else(|| anyhow!("cannot conclude from {:?}", exp_rec.state))?;
318    exp_store::set_state(&store, id, next, now_ms())?;
319    Ok(format!("concluded {id}\n"))
320}
321
322pub fn cmd_conclude(workspace: Option<&Path>, id: &str) -> Result<()> {
323    print!("{}", exp_conclude_text(workspace, id)?);
324    Ok(())
325}
326
327pub fn exp_start_text(workspace: Option<&Path>, id: &str) -> Result<String> {
328    let ws = workspace_path(workspace)?;
329    let store = Store::open(&crate::core::workspace::db_path(&ws)?)?;
330    let exp_rec = exp_store::load_experiment(&store, id)?
331        .ok_or_else(|| anyhow!("experiment not found: {id}"))?;
332    let next = transition(exp_rec.state, "start")
333        .ok_or_else(|| anyhow!("cannot start from {:?}", exp_rec.state))?;
334    exp_store::set_state(&store, id, next, now_ms())?;
335    Ok(format!("started {id}\n"))
336}
337
338pub fn cmd_start(workspace: Option<&Path>, id: &str) -> Result<()> {
339    print!("{}", exp_start_text(workspace, id)?);
340    Ok(())
341}
342
343pub fn exp_archive_text(workspace: Option<&Path>, id: &str) -> Result<String> {
344    let ws = workspace_path(workspace)?;
345    let store = Store::open(&crate::core::workspace::db_path(&ws)?)?;
346    let exp_rec = exp_store::load_experiment(&store, id)?
347        .ok_or_else(|| anyhow!("experiment not found: {id}"))?;
348    let next = transition(exp_rec.state, "archive")
349        .ok_or_else(|| anyhow!("cannot archive from {:?}", exp_rec.state))?;
350    exp_store::set_state(&store, id, next, now_ms())?;
351    Ok(format!("archived {id}\n"))
352}
353
354pub fn cmd_archive(workspace: Option<&Path>, id: &str) -> Result<()> {
355    print!("{}", exp_archive_text(workspace, id)?);
356    Ok(())
357}
358
359pub fn exp_power_text(
360    workspace: Option<&Path>,
361    metric: &str,
362    baseline_n: usize,
363    refresh: bool,
364) -> Result<String> {
365    use crate::experiment::stats::power;
366    use std::fmt::Write;
367
368    let ws = workspace_path(workspace)?;
369    let store = open_workspace_read_store(&ws, refresh)?;
370    let ws_str = ws.to_string_lossy().to_string();
371    if refresh {
372        let cfg = config::load(&ws)?;
373        maybe_scan_all_agents(&ws, &cfg, &ws_str, &store, true)?;
374    }
375
376    let metric_val = Metric::parse(metric).ok_or_else(|| anyhow!("unknown metric: {metric}"))?;
377    let now = now_ms();
378    let lookback_ms = 90 * 86_400_000_u64;
379    let values = store
380        .experiment_metric_values_in_window(
381            &ws_str,
382            now.saturating_sub(lookback_ms),
383            now,
384            metric_val,
385        )?
386        .into_iter()
387        .map(|(_, value)| value)
388        .collect::<Vec<_>>();
389
390    let mut out = String::new();
391    match power::mde(&values, baseline_n) {
392        None => writeln!(&mut out, "no data for metric {metric} in the last 90 days").unwrap(),
393        Some(r) => {
394            writeln!(&mut out, "metric:      {metric}").unwrap();
395            writeln!(&mut out, "baseline n:  {}", r.n_per_arm).unwrap();
396            writeln!(&mut out, "observed σ:  {:.3}", r.sigma).unwrap();
397            writeln!(&mut out, "MDE:         {:.3}", r.mde_absolute).unwrap();
398            if let Some(pct) = r.mde_pct {
399                writeln!(&mut out, "MDE %:       {:.1}%", pct).unwrap();
400            }
401            writeln!(
402                &mut out,
403                "\n(80% power · 95% CI · {n} sessions in baseline)",
404                n = values.len()
405            )
406            .unwrap();
407        }
408    }
409    Ok(out)
410}
411
412fn metric_values_by_session(
413    store: &Store,
414    ws: &str,
415    start_ms: u64,
416    end_ms: u64,
417    metric: Metric,
418) -> Result<(
419    Vec<crate::core::event::SessionRecord>,
420    std::collections::HashMap<String, f64>,
421)> {
422    let rows = store.experiment_metric_values_in_window(ws, start_ms, end_ms, metric)?;
423    let mut sessions = Vec::with_capacity(rows.len());
424    let mut values = std::collections::HashMap::with_capacity(rows.len());
425    for (session, value) in rows {
426        values.insert(session.id.clone(), value);
427        sessions.push(session);
428    }
429    Ok((sessions, values))
430}
431
432pub fn cmd_power(
433    workspace: Option<&Path>,
434    metric: &str,
435    baseline_n: usize,
436    refresh: bool,
437) -> Result<()> {
438    print!(
439        "{}",
440        exp_power_text(workspace, metric, baseline_n, refresh)?
441    );
442    Ok(())
443}
444
445fn window_for(e: &Experiment) -> (u64, u64) {
446    let end = e
447        .concluded_at_ms
448        .unwrap_or_else(|| e.created_at_ms + (e.duration_days as u64) * 86_400_000);
449    (e.created_at_ms, end.max(e.created_at_ms))
450}
451
452fn now_ms() -> u64 {
453    SystemTime::now()
454        .duration_since(UNIX_EPOCH)
455        .unwrap_or_default()
456        .as_millis() as u64
457}
458
459/// Deterministic UUIDv5 from experiment name + creation timestamp.
460/// Stable across devices so concurrent creation of same experiment yields same ID.
461fn deterministic_exp_id(name: &str, created_at_ms: u64) -> String {
462    // Application-level namespace: "kaizen:experiments" hashed via UUIDv5 with DNS ns.
463    const NS: uuid::Uuid = uuid::Uuid::from_bytes([
464        0x6b, 0x61, 0x69, 0x7a, 0x65, 0x6e, 0x3a, 0x65, 0x78, 0x70, 0x73, 0x00, 0x00, 0x00, 0x00,
465        0x01,
466    ]);
467    let key = format!("{name}:{created_at_ms}");
468    uuid::Uuid::new_v5(&NS, key.as_bytes()).to_string()
469}
470
471fn truncate(s: &str, max: usize) -> String {
472    if s.len() <= max {
473        return s.to_string();
474    }
475    let mut out: String = s.chars().take(max.saturating_sub(1)).collect();
476    out.push('…');
477    out
478}