Skip to main content

kaizen/shell/
exp.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2//! `kaizen exp` — experiment CRUD + report rendering.
3
4use crate::core::config;
5use crate::core::repo::repo_head;
6use crate::experiment::store as exp_store;
7use crate::experiment::types::{
8    Binding, Classification, Criterion, Direction, Experiment, Metric, State, transition,
9};
10use crate::experiment::{self as exp};
11use crate::shell::cli::{maybe_scan_all_agents, open_workspace_read_store, workspace_path};
12use crate::store::Store;
13use anyhow::{Context, Result, anyhow};
14use std::path::Path;
15use std::time::{SystemTime, UNIX_EPOCH};
16
17pub struct NewArgs {
18    pub name: String,
19    pub hypothesis: String,
20    pub change: String,
21    pub metric: String,
22    pub bind: String,
23    pub duration_days: u32,
24    pub target_pct: f64,
25    pub control_commit: Option<String>,
26    pub treatment_commit: Option<String>,
27    pub control_branch: Option<String>,
28    pub treatment_branch: Option<String>,
29}
30
31pub fn exp_new_text(workspace: Option<&Path>, args: NewArgs) -> Result<String> {
32    let ws = workspace_path(workspace)?;
33    let store = Store::open(&crate::core::workspace::db_path(&ws)?)?;
34    let metric =
35        Metric::parse(&args.metric).ok_or_else(|| anyhow!("unknown metric: {}", args.metric))?;
36    let binding = build_binding(&ws, &args)?;
37    let (direction, target_pct) = split_target(args.target_pct);
38    let created_at = now_ms();
39    let exp_rec = Experiment {
40        id: deterministic_exp_id(&args.name, created_at),
41        name: args.name.clone(),
42        hypothesis: args.hypothesis,
43        change_description: args.change,
44        metric,
45        binding,
46        duration_days: args.duration_days,
47        success_criterion: Criterion::Delta {
48            direction,
49            target_pct,
50        },
51        state: State::Draft,
52        created_at_ms: created_at,
53        concluded_at_ms: None,
54        guardrails: Vec::new(),
55    };
56    exp_store::save_experiment(&store, &exp_rec)?;
57    Ok(format!("created {} · {}\n", exp_rec.id, exp_rec.name))
58}
59
60pub fn cmd_new(workspace: Option<&Path>, args: NewArgs) -> Result<()> {
61    print!("{}", exp_new_text(workspace, args)?);
62    Ok(())
63}
64
65fn build_binding(ws: &Path, args: &NewArgs) -> Result<Binding> {
66    match args.bind.as_str() {
67        "git" => {
68            let treatment = match args.treatment_commit.clone() {
69                Some(v) => v,
70                None => repo_head(ws)?
71                    .ok_or_else(|| anyhow!("not a git repo; pass --treatment-commit"))?,
72            };
73            let control = match args.control_commit.clone() {
74                Some(v) => v,
75                None => parent_of(ws, &treatment)?,
76            };
77            Ok(Binding::GitCommit {
78                control_commit: control,
79                treatment_commit: treatment,
80            })
81        }
82        "branch" => {
83            let control = args
84                .control_branch
85                .clone()
86                .ok_or_else(|| anyhow!("--control-branch required for --bind branch"))?;
87            let treatment = args
88                .treatment_branch
89                .clone()
90                .ok_or_else(|| anyhow!("--treatment-branch required for --bind branch"))?;
91            Ok(Binding::Branch {
92                control_branch: control,
93                treatment_branch: treatment,
94            })
95        }
96        "manual" => Ok(Binding::ManualTag {
97            variant_field: "variant".into(),
98        }),
99        other => Err(anyhow!("unsupported bind: {other} (use git|branch|manual)")),
100    }
101}
102
103fn split_target(pct: f64) -> (Direction, f64) {
104    if pct < 0.0 {
105        (Direction::Decrease, pct)
106    } else {
107        (Direction::Increase, pct)
108    }
109}
110
111fn parent_of(ws: &Path, commit: &str) -> Result<String> {
112    let out = std::process::Command::new("git")
113        .arg("-C")
114        .arg(ws)
115        .args(["rev-parse", &format!("{commit}^")])
116        .output()
117        .context("git rev-parse parent")?;
118    if !out.status.success() {
119        return Err(anyhow!(
120            "git rev-parse failed: {}",
121            String::from_utf8_lossy(&out.stderr)
122        ));
123    }
124    Ok(String::from_utf8_lossy(&out.stdout).trim().to_string())
125}
126
127pub fn exp_list_text(workspace: Option<&Path>) -> Result<String> {
128    use std::fmt::Write;
129    let ws = workspace_path(workspace)?;
130    let store = Store::open(&crate::core::workspace::db_path(&ws)?)?;
131    let all = exp_store::list_experiments(&store)?;
132    let mut out = String::new();
133    if all.is_empty() {
134        writeln!(&mut out, "(no experiments)").unwrap();
135        return Ok(out);
136    }
137    writeln!(
138        &mut out,
139        "{:<38} {:<10} {:<24} METRIC",
140        "ID", "STATE", "NAME"
141    )
142    .unwrap();
143    writeln!(&mut out, "{}", "-".repeat(96)).unwrap();
144    for e in &all {
145        writeln!(
146            &mut out,
147            "{:<38} {:<10?} {:<24} {}",
148            e.id,
149            e.state,
150            truncate(&e.name, 24),
151            e.metric.as_str()
152        )
153        .unwrap();
154    }
155    Ok(out)
156}
157
158pub fn cmd_list(workspace: Option<&Path>) -> Result<()> {
159    print!("{}", exp_list_text(workspace)?);
160    Ok(())
161}
162
163pub fn exp_status_text(workspace: Option<&Path>, id: &str) -> Result<String> {
164    use std::fmt::Write;
165    let ws = workspace_path(workspace)?;
166    let store = Store::open(&crate::core::workspace::db_path(&ws)?)?;
167    let e = exp_store::load_experiment(&store, id)?
168        .ok_or_else(|| anyhow!("experiment not found: {id}"))?;
169    let mut out = String::new();
170    writeln!(&mut out, "id:         {}", e.id).unwrap();
171    writeln!(&mut out, "name:       {}", e.name).unwrap();
172    writeln!(&mut out, "state:      {:?}", e.state).unwrap();
173    writeln!(&mut out, "metric:     {}", e.metric.as_str()).unwrap();
174    writeln!(&mut out, "duration:   {}d", e.duration_days).unwrap();
175    writeln!(&mut out, "created:    {}", e.created_at_ms).unwrap();
176    if let Some(c) = e.concluded_at_ms {
177        writeln!(&mut out, "concluded:  {c}").unwrap();
178    }
179    writeln!(&mut out, "hypothesis: {}", e.hypothesis).unwrap();
180    writeln!(&mut out, "change:     {}", e.change_description).unwrap();
181    match &e.binding {
182        Binding::GitCommit {
183            control_commit,
184            treatment_commit,
185        } => {
186            writeln!(
187                &mut out,
188                "binding:    git control={control_commit} treatment={treatment_commit}"
189            )
190            .unwrap();
191        }
192        Binding::Branch {
193            control_branch,
194            treatment_branch,
195        } => {
196            writeln!(
197                &mut out,
198                "binding:    branch control={control_branch} treatment={treatment_branch}"
199            )
200            .unwrap();
201        }
202        Binding::ManualTag { variant_field } => {
203            writeln!(&mut out, "binding:    manual({variant_field})").unwrap();
204        }
205    }
206    Ok(out)
207}
208
209pub fn cmd_status(workspace: Option<&Path>, id: &str) -> Result<()> {
210    print!("{}", exp_status_text(workspace, id)?);
211    Ok(())
212}
213
214pub fn exp_tag_text(
215    workspace: Option<&Path>,
216    id: &str,
217    session_id: &str,
218    variant: &str,
219) -> Result<String> {
220    let ws = workspace_path(workspace)?;
221    let store = Store::open(&crate::core::workspace::db_path(&ws)?)?;
222    let v = match variant {
223        "control" => Classification::Control,
224        "treatment" => Classification::Treatment,
225        "excluded" => Classification::Excluded,
226        other => {
227            return Err(anyhow!(
228                "variant must be control|treatment|excluded, got {other}"
229            ));
230        }
231    };
232    exp_store::tag_session(&store, id, session_id, v)?;
233    Ok(format!("tagged {session_id} -> {variant} for {id}\n"))
234}
235
236pub fn cmd_tag(workspace: Option<&Path>, id: &str, session_id: &str, variant: &str) -> Result<()> {
237    print!("{}", exp_tag_text(workspace, id, session_id, variant)?);
238    Ok(())
239}
240
241pub fn exp_report_text(
242    workspace: Option<&Path>,
243    id: &str,
244    json_out: bool,
245    refresh: bool,
246) -> Result<String> {
247    let ws = workspace_path(workspace)?;
248    let store = open_workspace_read_store(&ws, refresh)?;
249    let ws_str = ws.to_string_lossy().to_string();
250    if refresh {
251        let cfg = config::load(&ws)?;
252        maybe_scan_all_agents(&ws, &cfg, &ws_str, &store, true)?;
253    }
254    let exp_rec = exp_store::load_experiment(&store, id)?
255        .ok_or_else(|| anyhow!("experiment not found: {id}"))?;
256    let (start_ms, end_ms) = window_for(&exp_rec);
257    let manual = exp_store::manual_tags(&store, id)?;
258    let (sessions, values) =
259        metric_values_by_session(&store, &ws_str, start_ms, end_ms, exp_rec.metric)?;
260    let mut guardrail_values = std::collections::HashMap::new();
261    for guardrail in &exp_rec.guardrails {
262        let (_, values) =
263            metric_values_by_session(&store, &ws_str, start_ms, end_ms, guardrail.metric)?;
264        guardrail_values.insert(guardrail.metric, values);
265    }
266    let report = exp::run_from_metric_values(
267        &exp_rec,
268        &sessions,
269        &values,
270        &guardrail_values,
271        &manual,
272        &ws,
273        false,
274    );
275    if json_out {
276        Ok(serde_json::to_string_pretty(&report)?)
277    } else {
278        Ok(exp::to_markdown(&report))
279    }
280}
281
282pub fn cmd_report(workspace: Option<&Path>, id: &str, json_out: bool, refresh: bool) -> Result<()> {
283    print!("{}", exp_report_text(workspace, id, json_out, refresh)?);
284    Ok(())
285}
286
287pub fn exp_conclude_text(workspace: Option<&Path>, id: &str) -> Result<String> {
288    let ws = workspace_path(workspace)?;
289    let store = Store::open(&crate::core::workspace::db_path(&ws)?)?;
290    let exp_rec = exp_store::load_experiment(&store, id)?
291        .ok_or_else(|| anyhow!("experiment not found: {id}"))?;
292    let next = transition(exp_rec.state, "conclude")
293        .ok_or_else(|| anyhow!("cannot conclude from {:?}", exp_rec.state))?;
294    exp_store::set_state(&store, id, next, now_ms())?;
295    Ok(format!("concluded {id}\n"))
296}
297
298pub fn cmd_conclude(workspace: Option<&Path>, id: &str) -> Result<()> {
299    print!("{}", exp_conclude_text(workspace, id)?);
300    Ok(())
301}
302
303pub fn exp_start_text(workspace: Option<&Path>, id: &str) -> Result<String> {
304    let ws = workspace_path(workspace)?;
305    let store = Store::open(&crate::core::workspace::db_path(&ws)?)?;
306    let exp_rec = exp_store::load_experiment(&store, id)?
307        .ok_or_else(|| anyhow!("experiment not found: {id}"))?;
308    let next = transition(exp_rec.state, "start")
309        .ok_or_else(|| anyhow!("cannot start from {:?}", exp_rec.state))?;
310    exp_store::set_state(&store, id, next, now_ms())?;
311    Ok(format!("started {id}\n"))
312}
313
314pub fn cmd_start(workspace: Option<&Path>, id: &str) -> Result<()> {
315    print!("{}", exp_start_text(workspace, id)?);
316    Ok(())
317}
318
319pub fn exp_archive_text(workspace: Option<&Path>, id: &str) -> Result<String> {
320    let ws = workspace_path(workspace)?;
321    let store = Store::open(&crate::core::workspace::db_path(&ws)?)?;
322    let exp_rec = exp_store::load_experiment(&store, id)?
323        .ok_or_else(|| anyhow!("experiment not found: {id}"))?;
324    let next = transition(exp_rec.state, "archive")
325        .ok_or_else(|| anyhow!("cannot archive from {:?}", exp_rec.state))?;
326    exp_store::set_state(&store, id, next, now_ms())?;
327    Ok(format!("archived {id}\n"))
328}
329
330pub fn cmd_archive(workspace: Option<&Path>, id: &str) -> Result<()> {
331    print!("{}", exp_archive_text(workspace, id)?);
332    Ok(())
333}
334
335pub fn exp_power_text(
336    workspace: Option<&Path>,
337    metric: &str,
338    baseline_n: usize,
339    refresh: bool,
340) -> Result<String> {
341    use crate::experiment::stats::power;
342    use std::fmt::Write;
343
344    let ws = workspace_path(workspace)?;
345    let store = open_workspace_read_store(&ws, refresh)?;
346    let ws_str = ws.to_string_lossy().to_string();
347    if refresh {
348        let cfg = config::load(&ws)?;
349        maybe_scan_all_agents(&ws, &cfg, &ws_str, &store, true)?;
350    }
351
352    let metric_val = Metric::parse(metric).ok_or_else(|| anyhow!("unknown metric: {metric}"))?;
353    let now = now_ms();
354    let lookback_ms = 90 * 86_400_000_u64;
355    let values = store
356        .experiment_metric_values_in_window(
357            &ws_str,
358            now.saturating_sub(lookback_ms),
359            now,
360            metric_val,
361        )?
362        .into_iter()
363        .map(|(_, value)| value)
364        .collect::<Vec<_>>();
365
366    let mut out = String::new();
367    match power::mde(&values, baseline_n) {
368        None => writeln!(&mut out, "no data for metric {metric} in the last 90 days").unwrap(),
369        Some(r) => {
370            writeln!(&mut out, "metric:      {metric}").unwrap();
371            writeln!(&mut out, "baseline n:  {}", r.n_per_arm).unwrap();
372            writeln!(&mut out, "observed σ:  {:.3}", r.sigma).unwrap();
373            writeln!(&mut out, "MDE:         {:.3}", r.mde_absolute).unwrap();
374            if let Some(pct) = r.mde_pct {
375                writeln!(&mut out, "MDE %:       {:.1}%", pct).unwrap();
376            }
377            writeln!(
378                &mut out,
379                "\n(80% power · 95% CI · {n} sessions in baseline)",
380                n = values.len()
381            )
382            .unwrap();
383        }
384    }
385    Ok(out)
386}
387
388fn metric_values_by_session(
389    store: &Store,
390    ws: &str,
391    start_ms: u64,
392    end_ms: u64,
393    metric: Metric,
394) -> Result<(
395    Vec<crate::core::event::SessionRecord>,
396    std::collections::HashMap<String, f64>,
397)> {
398    let rows = store.experiment_metric_values_in_window(ws, start_ms, end_ms, metric)?;
399    let mut sessions = Vec::with_capacity(rows.len());
400    let mut values = std::collections::HashMap::with_capacity(rows.len());
401    for (session, value) in rows {
402        values.insert(session.id.clone(), value);
403        sessions.push(session);
404    }
405    Ok((sessions, values))
406}
407
408pub fn cmd_power(
409    workspace: Option<&Path>,
410    metric: &str,
411    baseline_n: usize,
412    refresh: bool,
413) -> Result<()> {
414    print!(
415        "{}",
416        exp_power_text(workspace, metric, baseline_n, refresh)?
417    );
418    Ok(())
419}
420
421fn window_for(e: &Experiment) -> (u64, u64) {
422    let end = e
423        .concluded_at_ms
424        .unwrap_or_else(|| e.created_at_ms + (e.duration_days as u64) * 86_400_000);
425    (e.created_at_ms, end.max(e.created_at_ms))
426}
427
428fn now_ms() -> u64 {
429    SystemTime::now()
430        .duration_since(UNIX_EPOCH)
431        .unwrap_or_default()
432        .as_millis() as u64
433}
434
435/// Deterministic UUIDv5 from experiment name + creation timestamp.
436/// Stable across devices so concurrent creation of same experiment yields same ID.
437fn deterministic_exp_id(name: &str, created_at_ms: u64) -> String {
438    // Application-level namespace: "kaizen:experiments" hashed via UUIDv5 with DNS ns.
439    const NS: uuid::Uuid = uuid::Uuid::from_bytes([
440        0x6b, 0x61, 0x69, 0x7a, 0x65, 0x6e, 0x3a, 0x65, 0x78, 0x70, 0x73, 0x00, 0x00, 0x00, 0x00,
441        0x01,
442    ]);
443    let key = format!("{name}:{created_at_ms}");
444    uuid::Uuid::new_v5(&NS, key.as_bytes()).to_string()
445}
446
447fn truncate(s: &str, max: usize) -> String {
448    if s.len() <= max {
449        return s.to_string();
450    }
451    let mut out: String = s.chars().take(max.saturating_sub(1)).collect();
452    out.push('…');
453    out
454}